blob: 89167da949561634166536ecefdf42a5fd7c46c6 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +000013import stat
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040015import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000016import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040017
18from utils import file_path
19from utils import fs
20from utils import lru
21from utils import threading_utils
22from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000023tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040024
Lei Leife202df2019-06-11 17:33:34 +000025# third_party/
Takuto Ikutabf06ebf2019-08-02 07:28:23 +000026from scandir import scandir
Lei Leife202df2019-06-11 17:33:34 +000027import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040028
Junji Watanabe5e73aab2020-04-09 04:20:27 +000029import isolated_format
30
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040031# The file size to be used when we don't know the correct file size,
32# generally used for .isolated files.
33UNKNOWN_FILE_SIZE = None
34
35
36def file_write(path, content_generator):
37 """Writes file content as generated by content_generator.
38
39 Creates the intermediary directory as needed.
40
41 Returns the number of bytes written.
42
43 Meant to be mocked out in unit tests.
44 """
45 file_path.ensure_tree(os.path.dirname(path))
46 total = 0
47 with fs.open(path, 'wb') as f:
48 for d in content_generator:
49 total += len(d)
50 f.write(d)
51 return total
52
53
54def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000055 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040056
57 Currently it just checks the file exists and its size matches the expectation.
58 """
59 if size == UNKNOWN_FILE_SIZE:
60 return fs.isfile(path)
61 try:
62 actual_size = fs.stat(path).st_size
63 except OSError as e:
Junji Watanabe38b28b02020-04-23 10:23:30 +000064 logging.warning('Can\'t read item %s, assuming it\'s invalid: %s',
65 os.path.basename(path), e)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040066 return False
67 if size != actual_size:
68 logging.warning(
69 'Found invalid item %s; %d != %d',
70 os.path.basename(path), actual_size, size)
71 return False
72 return True
73
74
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000075def trim_caches(caches, path, min_free_space, max_age_secs):
76 """Trims multiple caches.
77
78 The goal here is to coherently trim all caches in a coherent LRU fashion,
79 deleting older items independent of which container they belong to.
80
81 Two policies are enforced first:
82 - max_age_secs
83 - min_free_space
84
85 Once that's done, then we enforce each cache's own policies.
86
87 Returns:
88 Slice containing the size of all items evicted.
89 """
90 min_ts = time.time() - max_age_secs if max_age_secs else 0
91 free_disk = file_path.get_free_space(path) if min_free_space else 0
92 total = []
93 if min_ts or free_disk:
94 while True:
95 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
96 if not oldest:
97 break
Lei Leife202df2019-06-11 17:33:34 +000098 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000099 c, ts = oldest[0]
100 if ts >= min_ts and free_disk >= min_free_space:
101 break
102 total.append(c.remove_oldest())
103 if min_free_space:
104 free_disk = file_path.get_free_space(path)
105 # Evaluate each cache's own policies.
106 for c in caches:
107 total.extend(c.trim())
108 return total
109
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000110def _use_scandir():
111 # Use scandir in windows for faster execution.
112 # Do not use in other OS due to crbug.com/989409
113 return sys.platform == 'win32'
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000114
Junji Watanabe38b28b02020-04-23 10:23:30 +0000115
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000116def _get_recursive_size(path):
117 """Returns the total data size for the specified path.
118
119 This function can be surprisingly slow on OSX, so its output should be cached.
120 """
121 try:
122 total = 0
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000123 if _use_scandir():
124
125 if sys.platform == 'win32':
Junji Watanabe38b28b02020-04-23 10:23:30 +0000126
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000127 def direntIsJunction(entry):
128 # both st_file_attributes and FILE_ATTRIBUTE_REPARSE_POINT are
129 # windows-only symbols.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000130 return bool(entry.stat().st_file_attributes & scandir
131 .FILE_ATTRIBUTE_REPARSE_POINT)
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000132 else:
Junji Watanabe38b28b02020-04-23 10:23:30 +0000133
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000134 def direntIsJunction(_entry):
135 return False
136
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000137 stack = [path]
138 while stack:
139 for entry in scandir.scandir(stack.pop()):
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000140 if entry.is_symlink() or direntIsJunction(entry):
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000141 continue
142 if entry.is_file():
143 total += entry.stat().st_size
144 elif entry.is_dir():
145 stack.append(entry.path)
146 else:
147 logging.warning('non directory/file entry: %s', entry)
148 return total
149
Takuto Ikuta54c8b062019-07-31 08:38:41 +0000150 for root, _, files in fs.walk(path):
151 for f in files:
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +0000152 st = fs.lstat(os.path.join(root, f))
153 if stat.S_ISLNK(st.st_mode):
154 continue
155 total += st.st_size
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000156 return total
Takuto Ikuta74b13b02020-06-08 07:13:50 +0000157 except (IOError, OSError, UnicodeEncodeError):
158 logging.exception('Exception while getting the size of %s', path)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000159 return None
160
161
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400162class NamedCacheError(Exception):
163 """Named cache specific error."""
164
165
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400166class NoMoreSpace(Exception):
167 """Not enough space to map the whole directory."""
168 pass
169
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400170
171class CachePolicies(object):
172 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
173 """Common caching policies for the multiple caches (isolated, named, cipd).
174
175 Arguments:
176 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
177 cache is effectively a leak.
178 - min_free_space: Trim if disk free space becomes lower than this value. If
179 0, it will unconditionally fill the disk.
180 - max_items: Maximum number of items to keep in the cache. If 0, do not
181 enforce a limit.
182 - max_age_secs: Maximum age an item is kept in the cache until it is
183 automatically evicted. Having a lot of dead luggage slows
184 everything down.
185 """
186 self.max_cache_size = max_cache_size
187 self.min_free_space = min_free_space
188 self.max_items = max_items
189 self.max_age_secs = max_age_secs
190
191 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000192 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
193 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
194 self.max_cache_size, float(self.max_cache_size) / 1024**3,
195 self.max_items, self.min_free_space,
196 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400197
198
199class CacheMiss(Exception):
200 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400201 def __init__(self, digest):
202 self.digest = digest
Junji Watanabe38b28b02020-04-23 10:23:30 +0000203 super(CacheMiss,
204 self).__init__('Item with digest %r is not found in cache' % digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400205
206
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400207class Cache(object):
Junji Watanabe38b28b02020-04-23 10:23:30 +0000208
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400209 def __init__(self, cache_dir):
210 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000211 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400212 assert file_path.isabs(cache_dir), cache_dir
213 self.cache_dir = cache_dir
214 self._lock = threading_utils.LockWithAssert()
215 # Profiling values.
216 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400217 self._used = []
218
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000219 def __nonzero__(self):
220 """A cache is always True.
221
222 Otherwise it falls back to __len__, which is surprising.
223 """
224 return True
225
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000226 def __bool__(self):
227 """A cache is always True.
228
229 Otherwise it falls back to __len__, which is surprising.
230 """
231 return True
232
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000233 def __len__(self):
234 """Returns the number of entries in the cache."""
235 raise NotImplementedError()
236
237 def __iter__(self):
238 """Iterates over all the entries names."""
239 raise NotImplementedError()
240
241 def __contains__(self, name):
242 """Returns if an entry is in the cache."""
243 raise NotImplementedError()
244
245 @property
246 def total_size(self):
247 """Returns the total size of the cache in bytes."""
248 raise NotImplementedError()
249
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400250 @property
251 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000252 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400253 with self._lock:
254 return self._added[:]
255
256 @property
257 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000258 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400259 with self._lock:
260 return self._used[:]
261
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000262 def get_oldest(self):
263 """Returns timestamp of oldest cache entry or None.
264
265 Returns:
266 Timestamp of the oldest item.
267
268 Used for manual trimming.
269 """
270 raise NotImplementedError()
271
272 def remove_oldest(self):
273 """Removes the oldest item from the cache.
274
275 Returns:
276 Size of the oldest item.
277
278 Used for manual trimming.
279 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400280 raise NotImplementedError()
281
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000282 def save(self):
283 """Saves the current cache to disk."""
284 raise NotImplementedError()
285
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400286 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000287 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400288
289 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000290 Slice with the size of evicted items.
291 """
292 raise NotImplementedError()
293
294 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000295 """Deletes any corrupted item from the cache, then calls trim(), then
296 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000297
298 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400299 """
300 raise NotImplementedError()
301
302
303class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400304 """Content addressed cache that stores objects temporarily.
305
306 It can be accessed concurrently from multiple threads, so it should protect
307 its internal state with some lock.
308 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400309
310 def __enter__(self):
311 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000312 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400313 return self
314
315 def __exit__(self, _exc_type, _exec_value, _traceback):
316 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000317 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400318 return False
319
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400320 def touch(self, digest, size):
321 """Ensures item is not corrupted and updates its LRU position.
322
323 Arguments:
324 digest: hash digest of item to check.
325 size: expected size of this item.
326
327 Returns:
328 True if item is in cache and not corrupted.
329 """
330 raise NotImplementedError()
331
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400332 def getfileobj(self, digest):
333 """Returns a readable file like object.
334
335 If file exists on the file system it will have a .name attribute with an
336 absolute path to the file.
337 """
338 raise NotImplementedError()
339
340 def write(self, digest, content):
341 """Reads data from |content| generator and stores it in cache.
342
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000343 It is possible to write to an object that already exists. It may be
344 ignored (sent to /dev/null) but the timestamp is still updated.
345
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400346 Returns digest to simplify chaining.
347 """
348 raise NotImplementedError()
349
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400350
351class MemoryContentAddressedCache(ContentAddressedCache):
352 """ContentAddressedCache implementation that stores everything in memory."""
353
Lei Leife202df2019-06-11 17:33:34 +0000354 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400355 """Args:
356 file_mode_mask: bit mask to AND file mode with. Default value will make
357 all mapped files to be read only.
358 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400359 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400360 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000361 # Items in a LRU lookup dict(digest: size).
362 self._lru = lru.LRUDict()
363
364 # Cache interface implementation.
365
366 def __len__(self):
367 with self._lock:
368 return len(self._lru)
369
370 def __iter__(self):
371 # This is not thread-safe.
372 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400373
374 def __contains__(self, digest):
375 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000376 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400377
378 @property
379 def total_size(self):
380 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000381 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400382
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000383 def get_oldest(self):
384 with self._lock:
385 try:
386 # (key, (value, ts))
387 return self._lru.get_oldest()[1][1]
388 except KeyError:
389 return None
390
391 def remove_oldest(self):
392 with self._lock:
393 # TODO(maruel): Update self._added.
394 # (key, (value, ts))
395 return len(self._lru.pop_oldest()[1][0])
396
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000397 def save(self):
398 pass
399
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000400 def trim(self):
401 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000402 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400403
404 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000405 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400406 pass
407
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000408 # ContentAddressedCache interface implementation.
409
410 def __contains__(self, digest):
411 with self._lock:
412 return digest in self._lru
413
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400414 def touch(self, digest, size):
415 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000416 try:
417 self._lru.touch(digest)
418 except KeyError:
419 return False
420 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400421
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400422 def getfileobj(self, digest):
423 with self._lock:
424 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000425 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400426 except KeyError:
427 raise CacheMiss(digest)
428 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000429 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400430 return io.BytesIO(d)
431
432 def write(self, digest, content):
433 # Assemble whole stream before taking the lock.
Lei Lei73a5f732020-03-23 20:36:14 +0000434 data = six.b('').join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400435 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000436 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400437 self._added.append(len(data))
438 return digest
439
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400440
441class DiskContentAddressedCache(ContentAddressedCache):
442 """Stateful LRU cache in a flat hash table in a directory.
443
444 Saves its state as json file.
445 """
446 STATE_FILE = u'state.json'
447
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000448 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400449 """
450 Arguments:
451 cache_dir: directory where to place the cache.
452 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400453 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000454 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400455 """
456 # All protected methods (starting with '_') except _path should be called
457 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400458 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400459 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400460 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
461 # Items in a LRU lookup dict(digest: size).
462 self._lru = lru.LRUDict()
463 # Current cached free disk space. It is updated by self._trim().
464 file_path.ensure_tree(self.cache_dir)
465 self._free_disk = file_path.get_free_space(self.cache_dir)
466 # The first item in the LRU cache that must not be evicted during this run
467 # since it was referenced. All items more recent that _protected in the LRU
468 # cache are also inherently protected. It could be a set() of all items
469 # referenced but this increases memory usage without a use case.
470 self._protected = None
471 # Cleanup operations done by self._load(), if any.
472 self._operations = []
473 with tools.Profiler('Setup'):
474 with self._lock:
475 self._load(trim, time_fn)
476
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000477 # Cache interface implementation.
478
479 def __len__(self):
480 with self._lock:
481 return len(self._lru)
482
483 def __iter__(self):
484 # This is not thread-safe.
485 return self._lru.__iter__()
486
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400487 def __contains__(self, digest):
488 with self._lock:
489 return digest in self._lru
490
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400491 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400492 def total_size(self):
493 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000494 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400495
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000496 def get_oldest(self):
497 with self._lock:
498 try:
499 # (key, (value, ts))
500 return self._lru.get_oldest()[1][1]
501 except KeyError:
502 return None
503
504 def remove_oldest(self):
505 with self._lock:
506 # TODO(maruel): Update self._added.
507 return self._remove_lru_file(True)
508
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000509 def save(self):
510 with self._lock:
511 return self._save()
512
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000513 def trim(self):
514 """Forces retention policies."""
515 with self._lock:
516 return self._trim()
517
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400518 def cleanup(self):
519 """Cleans up the cache directory.
520
521 Ensures there is no unknown files in cache_dir.
522 Ensures the read-only bits are set correctly.
523
524 At that point, the cache was already loaded, trimmed to respect cache
525 policies.
526 """
527 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000528 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400529 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000530 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400531 # It'd be faster if there were a readdir() function.
532 for filename in fs.listdir(self.cache_dir):
533 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000534 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400535 continue
536 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000537 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400538 previous.remove(filename)
539 continue
540
541 # An untracked file. Delete it.
542 logging.warning('Removing unknown file %s from cache', filename)
543 p = self._path(filename)
544 if fs.isdir(p):
545 try:
546 file_path.rmtree(p)
547 except OSError:
548 pass
549 else:
550 file_path.try_remove(p)
551 continue
552
553 if previous:
554 # Filter out entries that were not found.
555 logging.warning('Removed %d lost files', len(previous))
556 for filename in previous:
557 self._lru.pop(filename)
558 self._save()
559
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000560 # Verify hash of every single item to detect corruption. the corrupted
561 # files will be evicted.
562 with self._lock:
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000563 for digest, (_, timestamp) in list(self._lru._items.items()):
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000564 # verify only if the mtime is grather than the timestamp in state.json
565 # to avoid take too long time.
566 if self._get_mtime(digest) <= timestamp:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000567 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000568 logging.warning('Item has been modified. item: %s', digest)
569 if self._is_valid_hash(digest):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000570 # Update timestamp in state.json
571 self._lru.touch(digest)
572 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000573 # remove corrupted file from LRU and file system
574 self._lru.pop(digest)
575 self._delete_file(digest, UNKNOWN_FILE_SIZE)
576 logging.error('Deleted corrupted item: %s', digest)
577 self._save()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400578
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000579 # ContentAddressedCache interface implementation.
580
581 def __contains__(self, digest):
582 with self._lock:
583 return digest in self._lru
584
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400585 def touch(self, digest, size):
586 """Verifies an actual file is valid and bumps its LRU position.
587
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000588 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400589
590 Note that is doesn't compute the hash so it could still be corrupted if the
591 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400592 """
593 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000594 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400595
596 # Update its LRU position.
597 with self._lock:
598 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000599 if looks_valid:
600 # Exists but not in the LRU anymore.
601 self._delete_file(digest, size)
602 return False
603 if not looks_valid:
604 self._lru.pop(digest)
605 # Exists but not in the LRU anymore.
606 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400607 return False
608 self._lru.touch(digest)
609 self._protected = self._protected or digest
610 return True
611
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400612 def getfileobj(self, digest):
613 try:
614 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400615 except IOError:
616 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000617 with self._lock:
618 try:
619 self._used.append(self._lru[digest])
620 except KeyError:
621 # If the digest is not actually in _lru, assume it is a cache miss.
622 # Existing file will be overwritten by whoever uses the cache and added
623 # to _lru.
624 f.close()
625 raise CacheMiss(digest)
626 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400627
628 def write(self, digest, content):
629 assert content is not None
630 with self._lock:
631 self._protected = self._protected or digest
632 path = self._path(digest)
633 # A stale broken file may remain. It is possible for the file to have write
634 # access bit removed which would cause the file_write() call to fail to open
635 # in write mode. Take no chance here.
636 file_path.try_remove(path)
637 try:
638 size = file_write(path, content)
639 except:
640 # There are two possible places were an exception can occur:
641 # 1) Inside |content| generator in case of network or unzipping errors.
642 # 2) Inside file_write itself in case of disk IO errors.
643 # In any case delete an incomplete file and propagate the exception to
644 # caller, it will be logged there.
645 file_path.try_remove(path)
646 raise
647 # Make the file read-only in the cache. This has a few side-effects since
648 # the file node is modified, so every directory entries to this file becomes
649 # read-only. It's fine here because it is a new file.
650 file_path.set_read_only(path, True)
651 with self._lock:
652 self._add(digest, size)
653 return digest
654
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000655 # Internal functions.
656
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400657 def _load(self, trim, time_fn):
658 """Loads state of the cache from json file.
659
660 If cache_dir does not exist on disk, it is created.
661 """
662 self._lock.assert_locked()
663
664 if not fs.isfile(self.state_file):
665 if not fs.isdir(self.cache_dir):
666 fs.makedirs(self.cache_dir)
667 else:
668 # Load state of the cache.
669 try:
670 self._lru = lru.LRUDict.load(self.state_file)
671 except ValueError as err:
672 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000673 # Don't want to keep broken cache dir.
674 file_path.rmtree(self.cache_dir)
675 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000676 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400677 if time_fn:
678 self._lru.time_fn = time_fn
679 if trim:
680 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400681
682 def _save(self):
683 """Saves the LRU ordering."""
684 self._lock.assert_locked()
685 if sys.platform != 'win32':
686 d = os.path.dirname(self.state_file)
687 if fs.isdir(d):
688 # Necessary otherwise the file can't be created.
689 file_path.set_read_only(d, False)
690 if fs.isfile(self.state_file):
691 file_path.set_read_only(self.state_file, False)
692 self._lru.save(self.state_file)
693
694 def _trim(self):
695 """Trims anything we don't know, make sure enough free space exists."""
696 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000697 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400698
699 # Trim old items.
700 if self.policies.max_age_secs:
701 cutoff = self._lru.time_fn() - self.policies.max_age_secs
702 while self._lru:
703 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000704 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400705 if oldest[1][1] >= cutoff:
706 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000707 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400708
709 # Ensure maximum cache size.
710 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000711 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400712 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000713 e = self._remove_lru_file(True)
714 evicted.append(e)
715 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400716
717 # Ensure maximum number of items in the cache.
718 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000719 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000720 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400721
722 # Ensure enough free space.
723 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400724 while (
725 self.policies.min_free_space and
726 self._lru and
727 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000728 # self._free_disk is updated by this call.
729 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400730
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000731 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000732 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400733 usage_percent = 0.
734 if total_usage:
735 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
736
737 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000738 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
739 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
Junji Watanabe38b28b02020-04-23 10:23:30 +0000740 '%.1fkb)', len(evicted),
741 sum(evicted) / 1024., self._free_disk / 1024., total_usage / 1024.,
742 usage_percent, self.policies.max_cache_size / 1024.)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400743 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000744 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400745
746 def _path(self, digest):
747 """Returns the path to one item."""
748 return os.path.join(self.cache_dir, digest)
749
750 def _remove_lru_file(self, allow_protected):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000751 """Removes the latest recently used file and returns its size.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000752
753 Updates self._free_disk.
754 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400755 self._lock.assert_locked()
756 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000757 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400758 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000759 total_size = sum(self._lru.values())
760 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000761 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
762 '%s bytes (%.3f GiB) free_space') % (
763 self.policies, total_size, float(total_size) / 1024**3,
764 len(self._lru), self._free_disk,
765 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400766 raise NoMoreSpace(msg)
767 except KeyError:
768 # That means an internal error.
769 raise NoMoreSpace('Nothing to remove, can\'t happend')
770 digest, (size, _) = self._lru.pop_oldest()
771 logging.debug('Removing LRU file %s', digest)
772 self._delete_file(digest, size)
773 return size
774
775 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
776 """Adds an item into LRU cache marking it as a newest one."""
777 self._lock.assert_locked()
778 if size == UNKNOWN_FILE_SIZE:
779 size = fs.stat(self._path(digest)).st_size
780 self._added.append(size)
781 self._lru.add(digest, size)
782 self._free_disk -= size
783 # Do a quicker version of self._trim(). It only enforces free disk space,
784 # not cache size limits. It doesn't actually look at real free disk space,
785 # only uses its cache values. self._trim() will be called later to enforce
786 # real trimming but doing this quick version here makes it possible to map
787 # an isolated that is larger than the current amount of free disk space when
788 # the cache size is already large.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000789 while (self.policies.min_free_space and self._lru and
790 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000791 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400792 if self._remove_lru_file(False) == -1:
793 break
794
795 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000796 """Deletes cache file from the file system.
797
798 Updates self._free_disk.
799 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400800 self._lock.assert_locked()
801 try:
802 if size == UNKNOWN_FILE_SIZE:
803 try:
804 size = fs.stat(self._path(digest)).st_size
805 except OSError:
806 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000807 if file_path.try_remove(self._path(digest)):
808 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400809 except OSError as e:
810 if e.errno != errno.ENOENT:
811 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400812
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000813 def _get_mtime(self, digest):
814 """Get mtime of cache file."""
815 return os.path.getmtime(self._path(digest))
816
817 def _is_valid_hash(self, digest):
818 """Verify digest with supported hash algos."""
819 for _, algo in isolated_format.SUPPORTED_ALGOS.items():
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000820 if digest == isolated_format.hash_file(self._path(digest), algo):
821 return True
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000822 return False
823
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400824
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400825class NamedCache(Cache):
826 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400827
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400828 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400829 name is a short identifier that describes the contents of the cache, e.g.
830 "git_v8" could be all git repositories required by v8 builds, or
831 "build_chromium" could be build artefacts of the Chromium.
832 path is a directory path relative to the task run dir. Cache installation
833 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400834 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400835 _DIR_ALPHABET = string.ascii_letters + string.digits
836 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000837 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400838
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400839 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400840 """Initializes NamedCaches.
841
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400842 Arguments:
843 - cache_dir is a directory for persistent cache storage.
844 - policies is a CachePolicies instance.
845 - time_fn is a function that returns timestamp (float) and used to take
846 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400847 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400848 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400849 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000850 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400851 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
852 self._lru = lru.LRUDict()
853 if not fs.isdir(self.cache_dir):
854 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000855 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000856 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400857 self._lru = lru.LRUDict.load(self.state_file)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000858 for _, size in self._lru.values():
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000859 if not isinstance(size, six.integer_types):
Junji Watanabeedcf47d2020-06-11 08:41:01 +0000860 raise ValueError("size is not integer: %s" % size)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000861
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400862 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000863 logging.exception(
864 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400865 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000866 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000867 with self._lock:
868 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400869 if time_fn:
870 self._lru.time_fn = time_fn
871
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400872 @property
873 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000874 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400875 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000876 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400877
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000878 def install(self, dst, name):
879 """Creates the directory |dst| and moves a previous named cache |name| if it
880 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400881
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000882 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400883
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000884 Returns the reused named cache size in bytes, or 0 if none was present.
885
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400886 Raises NamedCacheError if cannot install the cache.
887 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000888 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400889 with self._lock:
890 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000891 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400892 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000893 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400894
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000895 # Remove the named symlink if it exists.
896 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000897 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000898 # Remove the symlink itself, not its destination.
899 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000900
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000901 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000902 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400903 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000904 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000905 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000906 file_path.ensure_tree(os.path.dirname(dst))
907 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400908 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000909 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400910
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000911 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400912 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400913
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000914 # The named cache does not exist, create an empty directory. When
915 # uninstalling, we will move it back to the cache and create an an
916 # entry.
917 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000918 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000919 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400920 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000921 # Raise using the original traceback.
922 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000923 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000924 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000925 finally:
926 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400927
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000928 def uninstall(self, src, name):
929 """Moves the cache directory back into the named cache hive for an eventual
930 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400931
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000932 The opposite of install().
933
934 src must be absolute and unicode. Its content is moved back into the local
935 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400936
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000937 Returns the named cache size in bytes.
938
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400939 Raises NamedCacheError if cannot uninstall the cache.
940 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000941 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400942 with self._lock:
943 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000944 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400945 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000946 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000947 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400948 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400949
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000950 if name in self._lru:
951 # This shouldn't happen but just remove the preexisting one and move
952 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000953 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000954 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000955
Takuto Ikuta93483272020-06-05 09:06:34 +0000956 # Calculate the size of the named cache to keep.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000957 size = _get_recursive_size(src)
958 logging.info('- Size is %d', size)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400959
960 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000961 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400962 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000963 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400964 file_path.ensure_tree(os.path.dirname(abs_cache))
Takuto Ikuta17a27ac2020-06-24 08:11:55 +0000965
966 if sys.platform != 'win32':
967 uid = os.getuid()
968 if os.stat(src).st_uid != uid:
969 # Maybe owner of |src| is different from runner of this script. This
970 # is to make fs.rename work in that case.
971 # https://crbug.com/986676
972 subprocess.check_call(['sudo', '-n', 'chown', str(uid), src])
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000973 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400974
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000975 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000976 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000977
978 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
979 # for user convenience.
980 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000981 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000982 file_path.remove(named_path)
983 else:
984 file_path.ensure_tree(os.path.dirname(named_path))
985
986 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000987 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000988 logging.info(
989 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000990 except OSError:
991 # Ignore on Windows. It happens when running as a normal user or when
992 # UAC is enabled and the user is a filtered administrator account.
993 if sys.platform != 'win32':
994 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000995 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400996 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000997 # Raise using the original traceback.
998 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000999 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001000 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001001 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001002 # Call save() at every uninstall. The assumptions are:
1003 # - The total the number of named caches is low, so the state.json file
1004 # is small, so the time it takes to write it to disk is short.
1005 # - The number of mapped named caches per task is low, so the number of
1006 # times save() is called on tear-down isn't high enough to be
1007 # significant.
1008 # - uninstall() sometimes throws due to file locking on Windows or
1009 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001010 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001011
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001012 # Cache interface implementation.
1013
1014 def __len__(self):
1015 with self._lock:
1016 return len(self._lru)
1017
1018 def __iter__(self):
1019 # This is not thread-safe.
1020 return self._lru.__iter__()
1021
John Budorickc6186972020-02-26 00:58:14 +00001022 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001023 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +00001024 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001025
1026 @property
1027 def total_size(self):
1028 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001029 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001030
1031 def get_oldest(self):
1032 with self._lock:
1033 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001034 # (key, (value, ts))
1035 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001036 except KeyError:
1037 return None
1038
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001039 def remove_oldest(self):
1040 with self._lock:
1041 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001042 _name, size = self._remove_lru_item()
1043 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001044
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001045 def save(self):
1046 with self._lock:
1047 return self._save()
1048
John Budorickc6186972020-02-26 00:58:14 +00001049 def touch(self, *names):
1050 with self._lock:
1051 for name in names:
1052 if name in self._lru:
1053 self._lru.touch(name)
1054 self._save()
1055
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001056 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001057 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001058 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001059 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001060 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001061
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001062 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001063 if self._policies.max_items:
1064 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001065 name, size = self._remove_lru_item()
1066 evicted.append(size)
1067 logging.info(
1068 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1069 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001070
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001071 # Trim according to maximum age.
1072 if self._policies.max_age_secs:
1073 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1074 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001075 _name, (_data, ts) = self._lru.get_oldest()
1076 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001077 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001078 name, size = self._remove_lru_item()
1079 evicted.append(size)
1080 logging.info(
1081 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1082 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001083
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001084 # Trim according to minimum free space.
1085 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001086 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001087 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001088 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001089 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001090 name, size = self._remove_lru_item()
1091 evicted.append(size)
1092 logging.info(
1093 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1094 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001095
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001096 # Trim according to maximum total size.
1097 if self._policies.max_cache_size:
1098 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001099 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001100 if total <= self._policies.max_cache_size:
1101 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001102 name, size = self._remove_lru_item()
1103 evicted.append(size)
1104 logging.info(
1105 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1106 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001107
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001108 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001109 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001110
1111 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001112 """Removes unknown directories.
1113
1114 Does not recalculate the cache size since it's surprisingly slow on some
1115 OSes.
1116 """
1117 success = True
1118 with self._lock:
1119 try:
1120 actual = set(fs.listdir(self.cache_dir))
1121 actual.discard(self.NAMED_DIR)
1122 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001123 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001124 # First, handle the actual cache content.
1125 # Remove missing entries.
1126 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001127 name, size = self._lru.pop(expected[missing])
1128 logging.warning(
1129 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001130 # Remove unexpected items.
1131 for unexpected in (actual - set(expected)):
1132 try:
1133 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001134 logging.warning(
1135 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001136 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001137 file_path.rmtree(p)
1138 else:
1139 fs.remove(p)
1140 except (IOError, OSError) as e:
1141 logging.error('Failed to remove %s: %s', unexpected, e)
1142 success = False
1143
1144 # Second, fix named cache links.
1145 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001146 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001147 actual = set(fs.listdir(named))
1148 expected = set(self._lru)
1149 # Confirm entries. Do not add missing ones for now.
1150 for name in expected.intersection(actual):
1151 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001152 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001153 if fs.islink(p):
1154 link = fs.readlink(p)
1155 if expected_link == link:
1156 continue
1157 logging.warning(
1158 'Unexpected symlink for cache %s: %s, expected %s',
1159 name, link, expected_link)
1160 else:
1161 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001162 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001163 file_path.rmtree(p)
1164 else:
1165 fs.remove(p)
1166 # Remove unexpected items.
1167 for unexpected in (actual - expected):
1168 try:
1169 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1170 if fs.isdir(p):
1171 file_path.rmtree(p)
1172 else:
1173 fs.remove(p)
1174 except (IOError, OSError) as e:
1175 logging.error('Failed to remove %s: %s', unexpected, e)
1176 success = False
1177 finally:
1178 self._save()
1179 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001180
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001181 # Internal functions.
1182
1183 def _try_upgrade(self):
1184 """Upgrades from the old format to the new one if necessary.
1185
1186 This code can be removed so all bots are known to have the right new format.
1187 """
1188 if not self._lru:
1189 return
1190 _name, (data, _ts) = self._lru.get_oldest()
1191 if isinstance(data, (list, tuple)):
1192 return
1193 # Update to v2.
1194 def upgrade(_name, rel_cache):
1195 abs_cache = os.path.join(self.cache_dir, rel_cache)
1196 return rel_cache, _get_recursive_size(abs_cache)
1197 self._lru.transform(upgrade)
1198 self._save()
1199
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001200 def _remove_lru_item(self):
1201 """Removes the oldest LRU entry. LRU must not be empty."""
1202 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1203 logging.info('Removing named cache %r', name)
1204 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001205 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001206
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001207 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001208 """Creates and returns relative path of a new cache directory.
1209
1210 In practice, it is a 2-letter string.
1211 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001212 # We randomly generate directory names that have two lower/upper case
1213 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1214 abc_len = len(self._DIR_ALPHABET)
1215 tried = set()
1216 while len(tried) < 1000:
1217 i = random.randint(0, abc_len * abc_len - 1)
1218 rel_path = (
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001219 self._DIR_ALPHABET[i // abc_len] + self._DIR_ALPHABET[i % abc_len])
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001220 if rel_path in tried:
1221 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001222 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001223 if not fs.exists(abs_path):
1224 return rel_path
1225 tried.add(rel_path)
1226 raise NamedCacheError(
1227 'could not allocate a new cache dir, too many cache dirs')
1228
1229 def _remove(self, name):
1230 """Removes a cache directory and entry.
1231
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001232 Returns:
1233 Number of caches deleted.
1234 """
1235 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001236 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001237 named_dir = self._get_named_path(name)
1238 if fs.islink(named_dir):
1239 fs.unlink(named_dir)
1240
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001241 # Then remove the actual data.
1242 if name not in self._lru:
1243 return
1244 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001245 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001246 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001247 file_path.rmtree(abs_path)
1248 self._lru.pop(name)
1249
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001250 def _save(self):
1251 self._lock.assert_locked()
1252 file_path.ensure_tree(self.cache_dir)
1253 self._lru.save(self.state_file)
1254
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001255 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001256 return os.path.join(self.cache_dir, self.NAMED_DIR, name)