blob: e7805e51444fcb5cd65f87239ca56cc693e2fd4c [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +000013import stat
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040015import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000016import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040017
18from utils import file_path
19from utils import fs
20from utils import lru
21from utils import threading_utils
22from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000023tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040024
Lei Leife202df2019-06-11 17:33:34 +000025# third_party/
Takuto Ikutabf06ebf2019-08-02 07:28:23 +000026from scandir import scandir
Lei Leife202df2019-06-11 17:33:34 +000027import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040028
Junji Watanabe5e73aab2020-04-09 04:20:27 +000029import isolated_format
30
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040031# The file size to be used when we don't know the correct file size,
32# generally used for .isolated files.
33UNKNOWN_FILE_SIZE = None
34
35
36def file_write(path, content_generator):
37 """Writes file content as generated by content_generator.
38
39 Creates the intermediary directory as needed.
40
41 Returns the number of bytes written.
42
43 Meant to be mocked out in unit tests.
44 """
45 file_path.ensure_tree(os.path.dirname(path))
46 total = 0
47 with fs.open(path, 'wb') as f:
48 for d in content_generator:
49 total += len(d)
50 f.write(d)
51 return total
52
53
54def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000055 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040056
57 Currently it just checks the file exists and its size matches the expectation.
58 """
59 if size == UNKNOWN_FILE_SIZE:
60 return fs.isfile(path)
61 try:
62 actual_size = fs.stat(path).st_size
63 except OSError as e:
Junji Watanabe38b28b02020-04-23 10:23:30 +000064 logging.warning('Can\'t read item %s, assuming it\'s invalid: %s',
65 os.path.basename(path), e)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040066 return False
67 if size != actual_size:
68 logging.warning(
69 'Found invalid item %s; %d != %d',
70 os.path.basename(path), actual_size, size)
71 return False
72 return True
73
74
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000075def trim_caches(caches, path, min_free_space, max_age_secs):
76 """Trims multiple caches.
77
78 The goal here is to coherently trim all caches in a coherent LRU fashion,
79 deleting older items independent of which container they belong to.
80
81 Two policies are enforced first:
82 - max_age_secs
83 - min_free_space
84
85 Once that's done, then we enforce each cache's own policies.
86
87 Returns:
88 Slice containing the size of all items evicted.
89 """
90 min_ts = time.time() - max_age_secs if max_age_secs else 0
91 free_disk = file_path.get_free_space(path) if min_free_space else 0
92 total = []
93 if min_ts or free_disk:
94 while True:
95 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
96 if not oldest:
97 break
Lei Leife202df2019-06-11 17:33:34 +000098 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000099 c, ts = oldest[0]
100 if ts >= min_ts and free_disk >= min_free_space:
101 break
102 total.append(c.remove_oldest())
103 if min_free_space:
104 free_disk = file_path.get_free_space(path)
105 # Evaluate each cache's own policies.
106 for c in caches:
107 total.extend(c.trim())
108 return total
109
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000110def _use_scandir():
111 # Use scandir in windows for faster execution.
112 # Do not use in other OS due to crbug.com/989409
113 return sys.platform == 'win32'
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000114
Junji Watanabe38b28b02020-04-23 10:23:30 +0000115
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000116def _get_recursive_size(path):
117 """Returns the total data size for the specified path.
118
119 This function can be surprisingly slow on OSX, so its output should be cached.
120 """
121 try:
122 total = 0
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000123 if _use_scandir():
124
125 if sys.platform == 'win32':
Junji Watanabe38b28b02020-04-23 10:23:30 +0000126
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000127 def direntIsJunction(entry):
128 # both st_file_attributes and FILE_ATTRIBUTE_REPARSE_POINT are
129 # windows-only symbols.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000130 return bool(entry.stat().st_file_attributes & scandir
131 .FILE_ATTRIBUTE_REPARSE_POINT)
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000132 else:
Junji Watanabe38b28b02020-04-23 10:23:30 +0000133
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000134 def direntIsJunction(_entry):
135 return False
136
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000137 stack = [path]
138 while stack:
139 for entry in scandir.scandir(stack.pop()):
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000140 if entry.is_symlink() or direntIsJunction(entry):
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000141 continue
142 if entry.is_file():
143 total += entry.stat().st_size
144 elif entry.is_dir():
145 stack.append(entry.path)
146 else:
147 logging.warning('non directory/file entry: %s', entry)
148 return total
149
Takuto Ikuta54c8b062019-07-31 08:38:41 +0000150 for root, _, files in fs.walk(path):
151 for f in files:
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +0000152 st = fs.lstat(os.path.join(root, f))
153 if stat.S_ISLNK(st.st_mode):
154 continue
155 total += st.st_size
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000156 return total
Takuto Ikuta74b13b02020-06-08 07:13:50 +0000157 except (IOError, OSError, UnicodeEncodeError):
158 logging.exception('Exception while getting the size of %s', path)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000159 return None
160
161
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400162class NamedCacheError(Exception):
163 """Named cache specific error."""
164
165
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400166class NoMoreSpace(Exception):
167 """Not enough space to map the whole directory."""
168 pass
169
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400170
171class CachePolicies(object):
172 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
173 """Common caching policies for the multiple caches (isolated, named, cipd).
174
175 Arguments:
176 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
177 cache is effectively a leak.
178 - min_free_space: Trim if disk free space becomes lower than this value. If
179 0, it will unconditionally fill the disk.
180 - max_items: Maximum number of items to keep in the cache. If 0, do not
181 enforce a limit.
182 - max_age_secs: Maximum age an item is kept in the cache until it is
183 automatically evicted. Having a lot of dead luggage slows
184 everything down.
185 """
186 self.max_cache_size = max_cache_size
187 self.min_free_space = min_free_space
188 self.max_items = max_items
189 self.max_age_secs = max_age_secs
190
191 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000192 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
193 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
194 self.max_cache_size, float(self.max_cache_size) / 1024**3,
195 self.max_items, self.min_free_space,
196 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400197
198
199class CacheMiss(Exception):
200 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400201 def __init__(self, digest):
202 self.digest = digest
Junji Watanabe38b28b02020-04-23 10:23:30 +0000203 super(CacheMiss,
204 self).__init__('Item with digest %r is not found in cache' % digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400205
206
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400207class Cache(object):
Junji Watanabe38b28b02020-04-23 10:23:30 +0000208
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400209 def __init__(self, cache_dir):
210 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000211 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400212 assert file_path.isabs(cache_dir), cache_dir
213 self.cache_dir = cache_dir
214 self._lock = threading_utils.LockWithAssert()
215 # Profiling values.
216 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400217 self._used = []
218
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000219 def __nonzero__(self):
220 """A cache is always True.
221
222 Otherwise it falls back to __len__, which is surprising.
223 """
224 return True
225
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000226 def __len__(self):
227 """Returns the number of entries in the cache."""
228 raise NotImplementedError()
229
230 def __iter__(self):
231 """Iterates over all the entries names."""
232 raise NotImplementedError()
233
234 def __contains__(self, name):
235 """Returns if an entry is in the cache."""
236 raise NotImplementedError()
237
238 @property
239 def total_size(self):
240 """Returns the total size of the cache in bytes."""
241 raise NotImplementedError()
242
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400243 @property
244 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000245 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400246 with self._lock:
247 return self._added[:]
248
249 @property
250 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000251 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400252 with self._lock:
253 return self._used[:]
254
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000255 def get_oldest(self):
256 """Returns timestamp of oldest cache entry or None.
257
258 Returns:
259 Timestamp of the oldest item.
260
261 Used for manual trimming.
262 """
263 raise NotImplementedError()
264
265 def remove_oldest(self):
266 """Removes the oldest item from the cache.
267
268 Returns:
269 Size of the oldest item.
270
271 Used for manual trimming.
272 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400273 raise NotImplementedError()
274
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000275 def save(self):
276 """Saves the current cache to disk."""
277 raise NotImplementedError()
278
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400279 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000280 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400281
282 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000283 Slice with the size of evicted items.
284 """
285 raise NotImplementedError()
286
287 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000288 """Deletes any corrupted item from the cache, then calls trim(), then
289 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000290
291 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400292 """
293 raise NotImplementedError()
294
295
296class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400297 """Content addressed cache that stores objects temporarily.
298
299 It can be accessed concurrently from multiple threads, so it should protect
300 its internal state with some lock.
301 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400302
303 def __enter__(self):
304 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000305 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400306 return self
307
308 def __exit__(self, _exc_type, _exec_value, _traceback):
309 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000310 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400311 return False
312
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400313 def touch(self, digest, size):
314 """Ensures item is not corrupted and updates its LRU position.
315
316 Arguments:
317 digest: hash digest of item to check.
318 size: expected size of this item.
319
320 Returns:
321 True if item is in cache and not corrupted.
322 """
323 raise NotImplementedError()
324
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400325 def getfileobj(self, digest):
326 """Returns a readable file like object.
327
328 If file exists on the file system it will have a .name attribute with an
329 absolute path to the file.
330 """
331 raise NotImplementedError()
332
333 def write(self, digest, content):
334 """Reads data from |content| generator and stores it in cache.
335
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000336 It is possible to write to an object that already exists. It may be
337 ignored (sent to /dev/null) but the timestamp is still updated.
338
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400339 Returns digest to simplify chaining.
340 """
341 raise NotImplementedError()
342
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400343
344class MemoryContentAddressedCache(ContentAddressedCache):
345 """ContentAddressedCache implementation that stores everything in memory."""
346
Lei Leife202df2019-06-11 17:33:34 +0000347 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400348 """Args:
349 file_mode_mask: bit mask to AND file mode with. Default value will make
350 all mapped files to be read only.
351 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400352 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400353 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000354 # Items in a LRU lookup dict(digest: size).
355 self._lru = lru.LRUDict()
356
357 # Cache interface implementation.
358
359 def __len__(self):
360 with self._lock:
361 return len(self._lru)
362
363 def __iter__(self):
364 # This is not thread-safe.
365 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400366
367 def __contains__(self, digest):
368 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000369 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400370
371 @property
372 def total_size(self):
373 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000374 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400375
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000376 def get_oldest(self):
377 with self._lock:
378 try:
379 # (key, (value, ts))
380 return self._lru.get_oldest()[1][1]
381 except KeyError:
382 return None
383
384 def remove_oldest(self):
385 with self._lock:
386 # TODO(maruel): Update self._added.
387 # (key, (value, ts))
388 return len(self._lru.pop_oldest()[1][0])
389
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000390 def save(self):
391 pass
392
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000393 def trim(self):
394 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000395 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400396
397 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000398 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400399 pass
400
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000401 # ContentAddressedCache interface implementation.
402
403 def __contains__(self, digest):
404 with self._lock:
405 return digest in self._lru
406
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400407 def touch(self, digest, size):
408 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000409 try:
410 self._lru.touch(digest)
411 except KeyError:
412 return False
413 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400414
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400415 def getfileobj(self, digest):
416 with self._lock:
417 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000418 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400419 except KeyError:
420 raise CacheMiss(digest)
421 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000422 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400423 return io.BytesIO(d)
424
425 def write(self, digest, content):
426 # Assemble whole stream before taking the lock.
Lei Lei73a5f732020-03-23 20:36:14 +0000427 data = six.b('').join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400428 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000429 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400430 self._added.append(len(data))
431 return digest
432
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400433
434class DiskContentAddressedCache(ContentAddressedCache):
435 """Stateful LRU cache in a flat hash table in a directory.
436
437 Saves its state as json file.
438 """
439 STATE_FILE = u'state.json'
440
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000441 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400442 """
443 Arguments:
444 cache_dir: directory where to place the cache.
445 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400446 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000447 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400448 """
449 # All protected methods (starting with '_') except _path should be called
450 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400451 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400452 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400453 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
454 # Items in a LRU lookup dict(digest: size).
455 self._lru = lru.LRUDict()
456 # Current cached free disk space. It is updated by self._trim().
457 file_path.ensure_tree(self.cache_dir)
458 self._free_disk = file_path.get_free_space(self.cache_dir)
459 # The first item in the LRU cache that must not be evicted during this run
460 # since it was referenced. All items more recent that _protected in the LRU
461 # cache are also inherently protected. It could be a set() of all items
462 # referenced but this increases memory usage without a use case.
463 self._protected = None
464 # Cleanup operations done by self._load(), if any.
465 self._operations = []
466 with tools.Profiler('Setup'):
467 with self._lock:
468 self._load(trim, time_fn)
469
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000470 # Cache interface implementation.
471
472 def __len__(self):
473 with self._lock:
474 return len(self._lru)
475
476 def __iter__(self):
477 # This is not thread-safe.
478 return self._lru.__iter__()
479
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400480 def __contains__(self, digest):
481 with self._lock:
482 return digest in self._lru
483
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400484 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400485 def total_size(self):
486 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000487 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400488
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000489 def get_oldest(self):
490 with self._lock:
491 try:
492 # (key, (value, ts))
493 return self._lru.get_oldest()[1][1]
494 except KeyError:
495 return None
496
497 def remove_oldest(self):
498 with self._lock:
499 # TODO(maruel): Update self._added.
500 return self._remove_lru_file(True)
501
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000502 def save(self):
503 with self._lock:
504 return self._save()
505
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000506 def trim(self):
507 """Forces retention policies."""
508 with self._lock:
509 return self._trim()
510
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400511 def cleanup(self):
512 """Cleans up the cache directory.
513
514 Ensures there is no unknown files in cache_dir.
515 Ensures the read-only bits are set correctly.
516
517 At that point, the cache was already loaded, trimmed to respect cache
518 policies.
519 """
520 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000521 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400522 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000523 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400524 # It'd be faster if there were a readdir() function.
525 for filename in fs.listdir(self.cache_dir):
526 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000527 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400528 continue
529 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000530 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400531 previous.remove(filename)
532 continue
533
534 # An untracked file. Delete it.
535 logging.warning('Removing unknown file %s from cache', filename)
536 p = self._path(filename)
537 if fs.isdir(p):
538 try:
539 file_path.rmtree(p)
540 except OSError:
541 pass
542 else:
543 file_path.try_remove(p)
544 continue
545
546 if previous:
547 # Filter out entries that were not found.
548 logging.warning('Removed %d lost files', len(previous))
549 for filename in previous:
550 self._lru.pop(filename)
551 self._save()
552
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000553 # Verify hash of every single item to detect corruption. the corrupted
554 # files will be evicted.
555 with self._lock:
556 for digest, (_, timestamp) in self._lru._items.items():
557 # verify only if the mtime is grather than the timestamp in state.json
558 # to avoid take too long time.
559 if self._get_mtime(digest) <= timestamp:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000560 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000561 logging.warning('Item has been modified. item: %s', digest)
562 if self._is_valid_hash(digest):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000563 # Update timestamp in state.json
564 self._lru.touch(digest)
565 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000566 # remove corrupted file from LRU and file system
567 self._lru.pop(digest)
568 self._delete_file(digest, UNKNOWN_FILE_SIZE)
569 logging.error('Deleted corrupted item: %s', digest)
570 self._save()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400571
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000572 # ContentAddressedCache interface implementation.
573
574 def __contains__(self, digest):
575 with self._lock:
576 return digest in self._lru
577
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400578 def touch(self, digest, size):
579 """Verifies an actual file is valid and bumps its LRU position.
580
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000581 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400582
583 Note that is doesn't compute the hash so it could still be corrupted if the
584 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400585 """
586 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000587 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400588
589 # Update its LRU position.
590 with self._lock:
591 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000592 if looks_valid:
593 # Exists but not in the LRU anymore.
594 self._delete_file(digest, size)
595 return False
596 if not looks_valid:
597 self._lru.pop(digest)
598 # Exists but not in the LRU anymore.
599 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400600 return False
601 self._lru.touch(digest)
602 self._protected = self._protected or digest
603 return True
604
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400605 def getfileobj(self, digest):
606 try:
607 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400608 except IOError:
609 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000610 with self._lock:
611 try:
612 self._used.append(self._lru[digest])
613 except KeyError:
614 # If the digest is not actually in _lru, assume it is a cache miss.
615 # Existing file will be overwritten by whoever uses the cache and added
616 # to _lru.
617 f.close()
618 raise CacheMiss(digest)
619 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400620
621 def write(self, digest, content):
622 assert content is not None
623 with self._lock:
624 self._protected = self._protected or digest
625 path = self._path(digest)
626 # A stale broken file may remain. It is possible for the file to have write
627 # access bit removed which would cause the file_write() call to fail to open
628 # in write mode. Take no chance here.
629 file_path.try_remove(path)
630 try:
631 size = file_write(path, content)
632 except:
633 # There are two possible places were an exception can occur:
634 # 1) Inside |content| generator in case of network or unzipping errors.
635 # 2) Inside file_write itself in case of disk IO errors.
636 # In any case delete an incomplete file and propagate the exception to
637 # caller, it will be logged there.
638 file_path.try_remove(path)
639 raise
640 # Make the file read-only in the cache. This has a few side-effects since
641 # the file node is modified, so every directory entries to this file becomes
642 # read-only. It's fine here because it is a new file.
643 file_path.set_read_only(path, True)
644 with self._lock:
645 self._add(digest, size)
646 return digest
647
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000648 # Internal functions.
649
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400650 def _load(self, trim, time_fn):
651 """Loads state of the cache from json file.
652
653 If cache_dir does not exist on disk, it is created.
654 """
655 self._lock.assert_locked()
656
657 if not fs.isfile(self.state_file):
658 if not fs.isdir(self.cache_dir):
659 fs.makedirs(self.cache_dir)
660 else:
661 # Load state of the cache.
662 try:
663 self._lru = lru.LRUDict.load(self.state_file)
664 except ValueError as err:
665 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000666 # Don't want to keep broken cache dir.
667 file_path.rmtree(self.cache_dir)
668 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000669 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400670 if time_fn:
671 self._lru.time_fn = time_fn
672 if trim:
673 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400674
675 def _save(self):
676 """Saves the LRU ordering."""
677 self._lock.assert_locked()
678 if sys.platform != 'win32':
679 d = os.path.dirname(self.state_file)
680 if fs.isdir(d):
681 # Necessary otherwise the file can't be created.
682 file_path.set_read_only(d, False)
683 if fs.isfile(self.state_file):
684 file_path.set_read_only(self.state_file, False)
685 self._lru.save(self.state_file)
686
687 def _trim(self):
688 """Trims anything we don't know, make sure enough free space exists."""
689 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000690 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400691
692 # Trim old items.
693 if self.policies.max_age_secs:
694 cutoff = self._lru.time_fn() - self.policies.max_age_secs
695 while self._lru:
696 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000697 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400698 if oldest[1][1] >= cutoff:
699 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000700 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400701
702 # Ensure maximum cache size.
703 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000704 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400705 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000706 e = self._remove_lru_file(True)
707 evicted.append(e)
708 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400709
710 # Ensure maximum number of items in the cache.
711 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000712 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000713 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400714
715 # Ensure enough free space.
716 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400717 while (
718 self.policies.min_free_space and
719 self._lru and
720 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000721 # self._free_disk is updated by this call.
722 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400723
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000724 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000725 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400726 usage_percent = 0.
727 if total_usage:
728 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
729
730 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000731 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
732 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
Junji Watanabe38b28b02020-04-23 10:23:30 +0000733 '%.1fkb)', len(evicted),
734 sum(evicted) / 1024., self._free_disk / 1024., total_usage / 1024.,
735 usage_percent, self.policies.max_cache_size / 1024.)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400736 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000737 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400738
739 def _path(self, digest):
740 """Returns the path to one item."""
741 return os.path.join(self.cache_dir, digest)
742
743 def _remove_lru_file(self, allow_protected):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000744 """Removes the latest recently used file and returns its size.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000745
746 Updates self._free_disk.
747 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400748 self._lock.assert_locked()
749 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000750 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400751 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000752 total_size = sum(self._lru.values())
753 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000754 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
755 '%s bytes (%.3f GiB) free_space') % (
756 self.policies, total_size, float(total_size) / 1024**3,
757 len(self._lru), self._free_disk,
758 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400759 raise NoMoreSpace(msg)
760 except KeyError:
761 # That means an internal error.
762 raise NoMoreSpace('Nothing to remove, can\'t happend')
763 digest, (size, _) = self._lru.pop_oldest()
764 logging.debug('Removing LRU file %s', digest)
765 self._delete_file(digest, size)
766 return size
767
768 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
769 """Adds an item into LRU cache marking it as a newest one."""
770 self._lock.assert_locked()
771 if size == UNKNOWN_FILE_SIZE:
772 size = fs.stat(self._path(digest)).st_size
773 self._added.append(size)
774 self._lru.add(digest, size)
775 self._free_disk -= size
776 # Do a quicker version of self._trim(). It only enforces free disk space,
777 # not cache size limits. It doesn't actually look at real free disk space,
778 # only uses its cache values. self._trim() will be called later to enforce
779 # real trimming but doing this quick version here makes it possible to map
780 # an isolated that is larger than the current amount of free disk space when
781 # the cache size is already large.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000782 while (self.policies.min_free_space and self._lru and
783 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000784 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400785 if self._remove_lru_file(False) == -1:
786 break
787
788 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000789 """Deletes cache file from the file system.
790
791 Updates self._free_disk.
792 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400793 self._lock.assert_locked()
794 try:
795 if size == UNKNOWN_FILE_SIZE:
796 try:
797 size = fs.stat(self._path(digest)).st_size
798 except OSError:
799 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000800 if file_path.try_remove(self._path(digest)):
801 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400802 except OSError as e:
803 if e.errno != errno.ENOENT:
804 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400805
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000806 def _get_mtime(self, digest):
807 """Get mtime of cache file."""
808 return os.path.getmtime(self._path(digest))
809
810 def _is_valid_hash(self, digest):
811 """Verify digest with supported hash algos."""
812 for _, algo in isolated_format.SUPPORTED_ALGOS.items():
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000813 if digest == isolated_format.hash_file(self._path(digest), algo):
814 return True
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000815 return False
816
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400817
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400818class NamedCache(Cache):
819 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400820
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400821 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400822 name is a short identifier that describes the contents of the cache, e.g.
823 "git_v8" could be all git repositories required by v8 builds, or
824 "build_chromium" could be build artefacts of the Chromium.
825 path is a directory path relative to the task run dir. Cache installation
826 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400827 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400828 _DIR_ALPHABET = string.ascii_letters + string.digits
829 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000830 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400831
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400832 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400833 """Initializes NamedCaches.
834
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400835 Arguments:
836 - cache_dir is a directory for persistent cache storage.
837 - policies is a CachePolicies instance.
838 - time_fn is a function that returns timestamp (float) and used to take
839 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400840 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400841 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400842 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000843 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400844 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
845 self._lru = lru.LRUDict()
846 if not fs.isdir(self.cache_dir):
847 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000848 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000849 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400850 self._lru = lru.LRUDict.load(self.state_file)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000851 for _, size in self._lru.values():
852 if not isinstance(size, (int, long)):
Junji Watanabeedcf47d2020-06-11 08:41:01 +0000853 raise ValueError("size is not integer: %s" % size)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000854
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400855 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000856 logging.exception(
857 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400858 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000859 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000860 with self._lock:
861 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400862 if time_fn:
863 self._lru.time_fn = time_fn
864
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400865 @property
866 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000867 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400868 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000869 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400870
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000871 def install(self, dst, name):
872 """Creates the directory |dst| and moves a previous named cache |name| if it
873 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400874
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000875 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400876
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000877 Returns the reused named cache size in bytes, or 0 if none was present.
878
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400879 Raises NamedCacheError if cannot install the cache.
880 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000881 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400882 with self._lock:
883 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000884 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400885 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000886 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400887
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000888 # Remove the named symlink if it exists.
889 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000890 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000891 # Remove the symlink itself, not its destination.
892 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000893
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000894 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000895 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400896 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000897 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000898 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000899 file_path.ensure_tree(os.path.dirname(dst))
900 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400901 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000902 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400903
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000904 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400905 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400906
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000907 # The named cache does not exist, create an empty directory. When
908 # uninstalling, we will move it back to the cache and create an an
909 # entry.
910 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000911 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000912 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400913 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000914 # Raise using the original traceback.
915 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000916 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Lei Leife202df2019-06-11 17:33:34 +0000917 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000918 finally:
919 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400920
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000921 def uninstall(self, src, name):
922 """Moves the cache directory back into the named cache hive for an eventual
923 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400924
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000925 The opposite of install().
926
927 src must be absolute and unicode. Its content is moved back into the local
928 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400929
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000930 Returns the named cache size in bytes.
931
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400932 Raises NamedCacheError if cannot uninstall the cache.
933 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000934 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400935 with self._lock:
936 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000937 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400938 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000939 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000940 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400941 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400942
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000943 if name in self._lru:
944 # This shouldn't happen but just remove the preexisting one and move
945 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000946 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000947 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000948
Takuto Ikuta93483272020-06-05 09:06:34 +0000949 # Calculate the size of the named cache to keep.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000950 size = _get_recursive_size(src)
951 logging.info('- Size is %d', size)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400952
953 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000954 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400955 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000956 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400957 file_path.ensure_tree(os.path.dirname(abs_cache))
Takuto Ikuta17a27ac2020-06-24 08:11:55 +0000958
959 if sys.platform != 'win32':
960 uid = os.getuid()
961 if os.stat(src).st_uid != uid:
962 # Maybe owner of |src| is different from runner of this script. This
963 # is to make fs.rename work in that case.
964 # https://crbug.com/986676
965 subprocess.check_call(['sudo', '-n', 'chown', str(uid), src])
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000966 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400967
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000968 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000969 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000970
971 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
972 # for user convenience.
973 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000974 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000975 file_path.remove(named_path)
976 else:
977 file_path.ensure_tree(os.path.dirname(named_path))
978
979 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000980 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000981 logging.info(
982 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000983 except OSError:
984 # Ignore on Windows. It happens when running as a normal user or when
985 # UAC is enabled and the user is a filtered administrator account.
986 if sys.platform != 'win32':
987 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000988 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400989 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000990 # Raise using the original traceback.
991 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000992 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Lei Leife202df2019-06-11 17:33:34 +0000993 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000994 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000995 # Call save() at every uninstall. The assumptions are:
996 # - The total the number of named caches is low, so the state.json file
997 # is small, so the time it takes to write it to disk is short.
998 # - The number of mapped named caches per task is low, so the number of
999 # times save() is called on tear-down isn't high enough to be
1000 # significant.
1001 # - uninstall() sometimes throws due to file locking on Windows or
1002 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001003 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001004
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001005 # Cache interface implementation.
1006
1007 def __len__(self):
1008 with self._lock:
1009 return len(self._lru)
1010
1011 def __iter__(self):
1012 # This is not thread-safe.
1013 return self._lru.__iter__()
1014
John Budorickc6186972020-02-26 00:58:14 +00001015 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001016 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +00001017 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001018
1019 @property
1020 def total_size(self):
1021 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001022 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001023
1024 def get_oldest(self):
1025 with self._lock:
1026 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001027 # (key, (value, ts))
1028 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001029 except KeyError:
1030 return None
1031
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001032 def remove_oldest(self):
1033 with self._lock:
1034 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001035 _name, size = self._remove_lru_item()
1036 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001037
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001038 def save(self):
1039 with self._lock:
1040 return self._save()
1041
John Budorickc6186972020-02-26 00:58:14 +00001042 def touch(self, *names):
1043 with self._lock:
1044 for name in names:
1045 if name in self._lru:
1046 self._lru.touch(name)
1047 self._save()
1048
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001049 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001050 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001051 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001052 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001053 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001054
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001055 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001056 if self._policies.max_items:
1057 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001058 name, size = self._remove_lru_item()
1059 evicted.append(size)
1060 logging.info(
1061 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1062 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001063
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001064 # Trim according to maximum age.
1065 if self._policies.max_age_secs:
1066 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1067 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001068 _name, (_data, ts) = self._lru.get_oldest()
1069 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001070 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001071 name, size = self._remove_lru_item()
1072 evicted.append(size)
1073 logging.info(
1074 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1075 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001076
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001077 # Trim according to minimum free space.
1078 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001079 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001080 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001081 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001082 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001083 name, size = self._remove_lru_item()
1084 evicted.append(size)
1085 logging.info(
1086 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1087 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001088
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001089 # Trim according to maximum total size.
1090 if self._policies.max_cache_size:
1091 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001092 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001093 if total <= self._policies.max_cache_size:
1094 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001095 name, size = self._remove_lru_item()
1096 evicted.append(size)
1097 logging.info(
1098 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1099 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001100
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001101 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001102 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001103
1104 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001105 """Removes unknown directories.
1106
1107 Does not recalculate the cache size since it's surprisingly slow on some
1108 OSes.
1109 """
1110 success = True
1111 with self._lock:
1112 try:
1113 actual = set(fs.listdir(self.cache_dir))
1114 actual.discard(self.NAMED_DIR)
1115 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001116 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001117 # First, handle the actual cache content.
1118 # Remove missing entries.
1119 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001120 name, size = self._lru.pop(expected[missing])
1121 logging.warning(
1122 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001123 # Remove unexpected items.
1124 for unexpected in (actual - set(expected)):
1125 try:
1126 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001127 logging.warning(
1128 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001129 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001130 file_path.rmtree(p)
1131 else:
1132 fs.remove(p)
1133 except (IOError, OSError) as e:
1134 logging.error('Failed to remove %s: %s', unexpected, e)
1135 success = False
1136
1137 # Second, fix named cache links.
1138 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001139 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001140 actual = set(fs.listdir(named))
1141 expected = set(self._lru)
1142 # Confirm entries. Do not add missing ones for now.
1143 for name in expected.intersection(actual):
1144 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001145 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001146 if fs.islink(p):
1147 link = fs.readlink(p)
1148 if expected_link == link:
1149 continue
1150 logging.warning(
1151 'Unexpected symlink for cache %s: %s, expected %s',
1152 name, link, expected_link)
1153 else:
1154 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001155 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001156 file_path.rmtree(p)
1157 else:
1158 fs.remove(p)
1159 # Remove unexpected items.
1160 for unexpected in (actual - expected):
1161 try:
1162 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1163 if fs.isdir(p):
1164 file_path.rmtree(p)
1165 else:
1166 fs.remove(p)
1167 except (IOError, OSError) as e:
1168 logging.error('Failed to remove %s: %s', unexpected, e)
1169 success = False
1170 finally:
1171 self._save()
1172 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001173
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001174 # Internal functions.
1175
1176 def _try_upgrade(self):
1177 """Upgrades from the old format to the new one if necessary.
1178
1179 This code can be removed so all bots are known to have the right new format.
1180 """
1181 if not self._lru:
1182 return
1183 _name, (data, _ts) = self._lru.get_oldest()
1184 if isinstance(data, (list, tuple)):
1185 return
1186 # Update to v2.
1187 def upgrade(_name, rel_cache):
1188 abs_cache = os.path.join(self.cache_dir, rel_cache)
1189 return rel_cache, _get_recursive_size(abs_cache)
1190 self._lru.transform(upgrade)
1191 self._save()
1192
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001193 def _remove_lru_item(self):
1194 """Removes the oldest LRU entry. LRU must not be empty."""
1195 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1196 logging.info('Removing named cache %r', name)
1197 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001198 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001199
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001200 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001201 """Creates and returns relative path of a new cache directory.
1202
1203 In practice, it is a 2-letter string.
1204 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001205 # We randomly generate directory names that have two lower/upper case
1206 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1207 abc_len = len(self._DIR_ALPHABET)
1208 tried = set()
1209 while len(tried) < 1000:
1210 i = random.randint(0, abc_len * abc_len - 1)
1211 rel_path = (
1212 self._DIR_ALPHABET[i / abc_len] +
1213 self._DIR_ALPHABET[i % abc_len])
1214 if rel_path in tried:
1215 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001216 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001217 if not fs.exists(abs_path):
1218 return rel_path
1219 tried.add(rel_path)
1220 raise NamedCacheError(
1221 'could not allocate a new cache dir, too many cache dirs')
1222
1223 def _remove(self, name):
1224 """Removes a cache directory and entry.
1225
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001226 Returns:
1227 Number of caches deleted.
1228 """
1229 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001230 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001231 named_dir = self._get_named_path(name)
1232 if fs.islink(named_dir):
1233 fs.unlink(named_dir)
1234
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001235 # Then remove the actual data.
1236 if name not in self._lru:
1237 return
1238 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001239 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001240 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001241 file_path.rmtree(abs_path)
1242 self._lru.pop(name)
1243
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001244 def _save(self):
1245 self._lock.assert_locked()
1246 file_path.ensure_tree(self.cache_dir)
1247 self._lru.save(self.state_file)
1248
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001249 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001250 return os.path.join(self.cache_dir, self.NAMED_DIR, name)