blob: 1cd516361d61e08e062f19c03a16ebb4f850daae [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04007import errno
8import io
9import logging
10import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040011import random
12import string
Junji Watanabe7b720782020-07-01 01:51:07 +000013import subprocess
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040014import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000015import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040016
17from utils import file_path
18from utils import fs
19from utils import lru
20from utils import threading_utils
21from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000022tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040023
Lei Leife202df2019-06-11 17:33:34 +000024# third_party/
25import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040026
Junji Watanabe5e73aab2020-04-09 04:20:27 +000027import isolated_format
28
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040029# The file size to be used when we don't know the correct file size,
30# generally used for .isolated files.
31UNKNOWN_FILE_SIZE = None
32
33
34def file_write(path, content_generator):
35 """Writes file content as generated by content_generator.
36
37 Creates the intermediary directory as needed.
38
39 Returns the number of bytes written.
40
41 Meant to be mocked out in unit tests.
42 """
43 file_path.ensure_tree(os.path.dirname(path))
44 total = 0
45 with fs.open(path, 'wb') as f:
46 for d in content_generator:
47 total += len(d)
48 f.write(d)
49 return total
50
51
52def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000053 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040054
55 Currently it just checks the file exists and its size matches the expectation.
56 """
57 if size == UNKNOWN_FILE_SIZE:
58 return fs.isfile(path)
59 try:
60 actual_size = fs.stat(path).st_size
61 except OSError as e:
Junji Watanabe38b28b02020-04-23 10:23:30 +000062 logging.warning('Can\'t read item %s, assuming it\'s invalid: %s',
63 os.path.basename(path), e)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040064 return False
65 if size != actual_size:
66 logging.warning(
67 'Found invalid item %s; %d != %d',
68 os.path.basename(path), actual_size, size)
69 return False
70 return True
71
72
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000073def trim_caches(caches, path, min_free_space, max_age_secs):
74 """Trims multiple caches.
75
76 The goal here is to coherently trim all caches in a coherent LRU fashion,
77 deleting older items independent of which container they belong to.
78
79 Two policies are enforced first:
80 - max_age_secs
81 - min_free_space
82
83 Once that's done, then we enforce each cache's own policies.
84
85 Returns:
86 Slice containing the size of all items evicted.
87 """
88 min_ts = time.time() - max_age_secs if max_age_secs else 0
89 free_disk = file_path.get_free_space(path) if min_free_space else 0
90 total = []
91 if min_ts or free_disk:
92 while True:
93 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
94 if not oldest:
95 break
Lei Leife202df2019-06-11 17:33:34 +000096 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000097 c, ts = oldest[0]
98 if ts >= min_ts and free_disk >= min_free_space:
99 break
100 total.append(c.remove_oldest())
101 if min_free_space:
102 free_disk = file_path.get_free_space(path)
103 # Evaluate each cache's own policies.
104 for c in caches:
105 total.extend(c.trim())
106 return total
107
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000108
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400109class NamedCacheError(Exception):
110 """Named cache specific error."""
111
112
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400113class NoMoreSpace(Exception):
114 """Not enough space to map the whole directory."""
115 pass
116
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400117
118class CachePolicies(object):
119 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
120 """Common caching policies for the multiple caches (isolated, named, cipd).
121
122 Arguments:
123 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
124 cache is effectively a leak.
125 - min_free_space: Trim if disk free space becomes lower than this value. If
126 0, it will unconditionally fill the disk.
127 - max_items: Maximum number of items to keep in the cache. If 0, do not
128 enforce a limit.
129 - max_age_secs: Maximum age an item is kept in the cache until it is
130 automatically evicted. Having a lot of dead luggage slows
131 everything down.
132 """
133 self.max_cache_size = max_cache_size
134 self.min_free_space = min_free_space
135 self.max_items = max_items
136 self.max_age_secs = max_age_secs
137
138 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000139 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
140 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
141 self.max_cache_size, float(self.max_cache_size) / 1024**3,
142 self.max_items, self.min_free_space,
143 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400144
145
146class CacheMiss(Exception):
147 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400148 def __init__(self, digest):
149 self.digest = digest
Junji Watanabe38b28b02020-04-23 10:23:30 +0000150 super(CacheMiss,
151 self).__init__('Item with digest %r is not found in cache' % digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400152
153
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400154class Cache(object):
Junji Watanabe38b28b02020-04-23 10:23:30 +0000155
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400156 def __init__(self, cache_dir):
157 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000158 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400159 assert file_path.isabs(cache_dir), cache_dir
160 self.cache_dir = cache_dir
161 self._lock = threading_utils.LockWithAssert()
162 # Profiling values.
163 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400164 self._used = []
165
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000166 def __nonzero__(self):
167 """A cache is always True.
168
169 Otherwise it falls back to __len__, which is surprising.
170 """
171 return True
172
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000173 def __bool__(self):
174 """A cache is always True.
175
176 Otherwise it falls back to __len__, which is surprising.
177 """
178 return True
179
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000180 def __len__(self):
181 """Returns the number of entries in the cache."""
182 raise NotImplementedError()
183
184 def __iter__(self):
185 """Iterates over all the entries names."""
186 raise NotImplementedError()
187
188 def __contains__(self, name):
189 """Returns if an entry is in the cache."""
190 raise NotImplementedError()
191
192 @property
193 def total_size(self):
194 """Returns the total size of the cache in bytes."""
195 raise NotImplementedError()
196
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400197 @property
198 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000199 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400200 with self._lock:
201 return self._added[:]
202
203 @property
204 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000205 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400206 with self._lock:
207 return self._used[:]
208
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000209 def get_oldest(self):
210 """Returns timestamp of oldest cache entry or None.
211
212 Returns:
213 Timestamp of the oldest item.
214
215 Used for manual trimming.
216 """
217 raise NotImplementedError()
218
219 def remove_oldest(self):
220 """Removes the oldest item from the cache.
221
222 Returns:
223 Size of the oldest item.
224
225 Used for manual trimming.
226 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400227 raise NotImplementedError()
228
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000229 def save(self):
230 """Saves the current cache to disk."""
231 raise NotImplementedError()
232
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400233 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000234 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400235
236 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000237 Slice with the size of evicted items.
238 """
239 raise NotImplementedError()
240
241 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000242 """Deletes any corrupted item from the cache, then calls trim(), then
243 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000244
245 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400246 """
247 raise NotImplementedError()
248
249
250class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400251 """Content addressed cache that stores objects temporarily.
252
253 It can be accessed concurrently from multiple threads, so it should protect
254 its internal state with some lock.
255 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400256
257 def __enter__(self):
258 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000259 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400260 return self
261
262 def __exit__(self, _exc_type, _exec_value, _traceback):
263 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000264 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400265 return False
266
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400267 def touch(self, digest, size):
268 """Ensures item is not corrupted and updates its LRU position.
269
270 Arguments:
271 digest: hash digest of item to check.
272 size: expected size of this item.
273
274 Returns:
275 True if item is in cache and not corrupted.
276 """
277 raise NotImplementedError()
278
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400279 def getfileobj(self, digest):
280 """Returns a readable file like object.
281
282 If file exists on the file system it will have a .name attribute with an
283 absolute path to the file.
284 """
285 raise NotImplementedError()
286
287 def write(self, digest, content):
288 """Reads data from |content| generator and stores it in cache.
289
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000290 It is possible to write to an object that already exists. It may be
291 ignored (sent to /dev/null) but the timestamp is still updated.
292
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400293 Returns digest to simplify chaining.
294 """
295 raise NotImplementedError()
296
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400297
298class MemoryContentAddressedCache(ContentAddressedCache):
299 """ContentAddressedCache implementation that stores everything in memory."""
300
Lei Leife202df2019-06-11 17:33:34 +0000301 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400302 """Args:
303 file_mode_mask: bit mask to AND file mode with. Default value will make
304 all mapped files to be read only.
305 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400306 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400307 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000308 # Items in a LRU lookup dict(digest: size).
309 self._lru = lru.LRUDict()
310
311 # Cache interface implementation.
312
313 def __len__(self):
314 with self._lock:
315 return len(self._lru)
316
317 def __iter__(self):
318 # This is not thread-safe.
319 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400320
321 def __contains__(self, digest):
322 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000323 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400324
325 @property
326 def total_size(self):
327 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000328 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400329
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000330 def get_oldest(self):
331 with self._lock:
332 try:
333 # (key, (value, ts))
334 return self._lru.get_oldest()[1][1]
335 except KeyError:
336 return None
337
338 def remove_oldest(self):
339 with self._lock:
340 # TODO(maruel): Update self._added.
341 # (key, (value, ts))
342 return len(self._lru.pop_oldest()[1][0])
343
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000344 def save(self):
345 pass
346
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000347 def trim(self):
348 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000349 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400350
351 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000352 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400353 pass
354
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000355 # ContentAddressedCache interface implementation.
356
357 def __contains__(self, digest):
358 with self._lock:
359 return digest in self._lru
360
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400361 def touch(self, digest, size):
362 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000363 try:
364 self._lru.touch(digest)
365 except KeyError:
366 return False
367 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400368
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400369 def getfileobj(self, digest):
370 with self._lock:
371 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000372 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400373 except KeyError:
374 raise CacheMiss(digest)
375 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000376 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400377 return io.BytesIO(d)
378
379 def write(self, digest, content):
380 # Assemble whole stream before taking the lock.
Lei Lei73a5f732020-03-23 20:36:14 +0000381 data = six.b('').join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400382 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000383 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400384 self._added.append(len(data))
385 return digest
386
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400387
388class DiskContentAddressedCache(ContentAddressedCache):
389 """Stateful LRU cache in a flat hash table in a directory.
390
391 Saves its state as json file.
392 """
393 STATE_FILE = u'state.json'
394
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000395 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400396 """
397 Arguments:
398 cache_dir: directory where to place the cache.
399 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400400 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000401 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400402 """
403 # All protected methods (starting with '_') except _path should be called
404 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400405 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400406 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400407 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
408 # Items in a LRU lookup dict(digest: size).
409 self._lru = lru.LRUDict()
410 # Current cached free disk space. It is updated by self._trim().
411 file_path.ensure_tree(self.cache_dir)
412 self._free_disk = file_path.get_free_space(self.cache_dir)
413 # The first item in the LRU cache that must not be evicted during this run
414 # since it was referenced. All items more recent that _protected in the LRU
415 # cache are also inherently protected. It could be a set() of all items
416 # referenced but this increases memory usage without a use case.
417 self._protected = None
418 # Cleanup operations done by self._load(), if any.
419 self._operations = []
420 with tools.Profiler('Setup'):
421 with self._lock:
422 self._load(trim, time_fn)
423
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000424 # Cache interface implementation.
425
426 def __len__(self):
427 with self._lock:
428 return len(self._lru)
429
430 def __iter__(self):
431 # This is not thread-safe.
432 return self._lru.__iter__()
433
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400434 def __contains__(self, digest):
435 with self._lock:
436 return digest in self._lru
437
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400438 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400439 def total_size(self):
440 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000441 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400442
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000443 def get_oldest(self):
444 with self._lock:
445 try:
446 # (key, (value, ts))
447 return self._lru.get_oldest()[1][1]
448 except KeyError:
449 return None
450
451 def remove_oldest(self):
452 with self._lock:
453 # TODO(maruel): Update self._added.
454 return self._remove_lru_file(True)
455
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000456 def save(self):
457 with self._lock:
458 return self._save()
459
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000460 def trim(self):
461 """Forces retention policies."""
462 with self._lock:
463 return self._trim()
464
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400465 def cleanup(self):
466 """Cleans up the cache directory.
467
468 Ensures there is no unknown files in cache_dir.
469 Ensures the read-only bits are set correctly.
470
471 At that point, the cache was already loaded, trimmed to respect cache
472 policies.
473 """
474 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000475 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400476 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000477 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400478 # It'd be faster if there were a readdir() function.
479 for filename in fs.listdir(self.cache_dir):
480 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000481 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400482 continue
483 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000484 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400485 previous.remove(filename)
486 continue
487
488 # An untracked file. Delete it.
489 logging.warning('Removing unknown file %s from cache', filename)
490 p = self._path(filename)
491 if fs.isdir(p):
492 try:
493 file_path.rmtree(p)
494 except OSError:
495 pass
496 else:
497 file_path.try_remove(p)
498 continue
499
500 if previous:
501 # Filter out entries that were not found.
502 logging.warning('Removed %d lost files', len(previous))
503 for filename in previous:
504 self._lru.pop(filename)
505 self._save()
506
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000507 # Verify hash of every single item to detect corruption. the corrupted
508 # files will be evicted.
509 with self._lock:
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000510 for digest, (_, timestamp) in list(self._lru._items.items()):
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000511 # verify only if the mtime is grather than the timestamp in state.json
512 # to avoid take too long time.
513 if self._get_mtime(digest) <= timestamp:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000514 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000515 logging.warning('Item has been modified. item: %s', digest)
516 if self._is_valid_hash(digest):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000517 # Update timestamp in state.json
518 self._lru.touch(digest)
519 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000520 # remove corrupted file from LRU and file system
521 self._lru.pop(digest)
522 self._delete_file(digest, UNKNOWN_FILE_SIZE)
523 logging.error('Deleted corrupted item: %s', digest)
524 self._save()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400525
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000526 # ContentAddressedCache interface implementation.
527
528 def __contains__(self, digest):
529 with self._lock:
530 return digest in self._lru
531
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400532 def touch(self, digest, size):
533 """Verifies an actual file is valid and bumps its LRU position.
534
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000535 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400536
537 Note that is doesn't compute the hash so it could still be corrupted if the
538 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400539 """
540 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000541 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400542
543 # Update its LRU position.
544 with self._lock:
545 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000546 if looks_valid:
547 # Exists but not in the LRU anymore.
548 self._delete_file(digest, size)
549 return False
550 if not looks_valid:
551 self._lru.pop(digest)
552 # Exists but not in the LRU anymore.
553 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400554 return False
555 self._lru.touch(digest)
556 self._protected = self._protected or digest
557 return True
558
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400559 def getfileobj(self, digest):
560 try:
561 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400562 except IOError:
563 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000564 with self._lock:
565 try:
566 self._used.append(self._lru[digest])
567 except KeyError:
568 # If the digest is not actually in _lru, assume it is a cache miss.
569 # Existing file will be overwritten by whoever uses the cache and added
570 # to _lru.
571 f.close()
572 raise CacheMiss(digest)
573 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400574
575 def write(self, digest, content):
576 assert content is not None
577 with self._lock:
578 self._protected = self._protected or digest
579 path = self._path(digest)
580 # A stale broken file may remain. It is possible for the file to have write
581 # access bit removed which would cause the file_write() call to fail to open
582 # in write mode. Take no chance here.
583 file_path.try_remove(path)
584 try:
585 size = file_write(path, content)
586 except:
587 # There are two possible places were an exception can occur:
588 # 1) Inside |content| generator in case of network or unzipping errors.
589 # 2) Inside file_write itself in case of disk IO errors.
590 # In any case delete an incomplete file and propagate the exception to
591 # caller, it will be logged there.
592 file_path.try_remove(path)
593 raise
594 # Make the file read-only in the cache. This has a few side-effects since
595 # the file node is modified, so every directory entries to this file becomes
596 # read-only. It's fine here because it is a new file.
597 file_path.set_read_only(path, True)
598 with self._lock:
599 self._add(digest, size)
600 return digest
601
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000602 # Internal functions.
603
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400604 def _load(self, trim, time_fn):
605 """Loads state of the cache from json file.
606
607 If cache_dir does not exist on disk, it is created.
608 """
609 self._lock.assert_locked()
610
611 if not fs.isfile(self.state_file):
612 if not fs.isdir(self.cache_dir):
613 fs.makedirs(self.cache_dir)
614 else:
615 # Load state of the cache.
616 try:
617 self._lru = lru.LRUDict.load(self.state_file)
618 except ValueError as err:
619 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000620 # Don't want to keep broken cache dir.
621 file_path.rmtree(self.cache_dir)
622 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000623 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400624 if time_fn:
625 self._lru.time_fn = time_fn
626 if trim:
627 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400628
629 def _save(self):
630 """Saves the LRU ordering."""
631 self._lock.assert_locked()
632 if sys.platform != 'win32':
633 d = os.path.dirname(self.state_file)
634 if fs.isdir(d):
635 # Necessary otherwise the file can't be created.
636 file_path.set_read_only(d, False)
637 if fs.isfile(self.state_file):
638 file_path.set_read_only(self.state_file, False)
639 self._lru.save(self.state_file)
640
641 def _trim(self):
642 """Trims anything we don't know, make sure enough free space exists."""
643 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000644 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400645
646 # Trim old items.
647 if self.policies.max_age_secs:
648 cutoff = self._lru.time_fn() - self.policies.max_age_secs
649 while self._lru:
650 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000651 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400652 if oldest[1][1] >= cutoff:
653 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000654 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400655
656 # Ensure maximum cache size.
657 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000658 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400659 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000660 e = self._remove_lru_file(True)
661 evicted.append(e)
662 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400663
664 # Ensure maximum number of items in the cache.
665 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000666 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000667 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400668
669 # Ensure enough free space.
670 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400671 while (
672 self.policies.min_free_space and
673 self._lru and
674 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000675 # self._free_disk is updated by this call.
676 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400677
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000678 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000679 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400680 usage_percent = 0.
681 if total_usage:
682 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
683
684 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000685 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
686 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
Junji Watanabe38b28b02020-04-23 10:23:30 +0000687 '%.1fkb)', len(evicted),
688 sum(evicted) / 1024., self._free_disk / 1024., total_usage / 1024.,
689 usage_percent, self.policies.max_cache_size / 1024.)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400690 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000691 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400692
693 def _path(self, digest):
694 """Returns the path to one item."""
695 return os.path.join(self.cache_dir, digest)
696
697 def _remove_lru_file(self, allow_protected):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000698 """Removes the latest recently used file and returns its size.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000699
700 Updates self._free_disk.
701 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400702 self._lock.assert_locked()
703 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000704 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400705 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000706 total_size = sum(self._lru.values())
707 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000708 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
709 '%s bytes (%.3f GiB) free_space') % (
710 self.policies, total_size, float(total_size) / 1024**3,
711 len(self._lru), self._free_disk,
712 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400713 raise NoMoreSpace(msg)
714 except KeyError:
715 # That means an internal error.
716 raise NoMoreSpace('Nothing to remove, can\'t happend')
717 digest, (size, _) = self._lru.pop_oldest()
Takuto Ikuta8d8ca9b2021-02-26 02:31:43 +0000718 logging.debug('Removing LRU file %s with size %s bytes', digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400719 self._delete_file(digest, size)
720 return size
721
722 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
723 """Adds an item into LRU cache marking it as a newest one."""
724 self._lock.assert_locked()
725 if size == UNKNOWN_FILE_SIZE:
726 size = fs.stat(self._path(digest)).st_size
727 self._added.append(size)
728 self._lru.add(digest, size)
729 self._free_disk -= size
730 # Do a quicker version of self._trim(). It only enforces free disk space,
731 # not cache size limits. It doesn't actually look at real free disk space,
732 # only uses its cache values. self._trim() will be called later to enforce
733 # real trimming but doing this quick version here makes it possible to map
734 # an isolated that is larger than the current amount of free disk space when
735 # the cache size is already large.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000736 while (self.policies.min_free_space and self._lru and
737 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000738 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400739 if self._remove_lru_file(False) == -1:
740 break
741
742 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000743 """Deletes cache file from the file system.
744
745 Updates self._free_disk.
746 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400747 self._lock.assert_locked()
748 try:
749 if size == UNKNOWN_FILE_SIZE:
750 try:
751 size = fs.stat(self._path(digest)).st_size
752 except OSError:
753 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000754 if file_path.try_remove(self._path(digest)):
755 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400756 except OSError as e:
757 if e.errno != errno.ENOENT:
758 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400759
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000760 def _get_mtime(self, digest):
761 """Get mtime of cache file."""
762 return os.path.getmtime(self._path(digest))
763
764 def _is_valid_hash(self, digest):
765 """Verify digest with supported hash algos."""
766 for _, algo in isolated_format.SUPPORTED_ALGOS.items():
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000767 if digest == isolated_format.hash_file(self._path(digest), algo):
768 return True
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000769 return False
770
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400771
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400772class NamedCache(Cache):
773 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400774
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400775 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400776 name is a short identifier that describes the contents of the cache, e.g.
777 "git_v8" could be all git repositories required by v8 builds, or
778 "build_chromium" could be build artefacts of the Chromium.
779 path is a directory path relative to the task run dir. Cache installation
780 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400781 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400782 _DIR_ALPHABET = string.ascii_letters + string.digits
783 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000784 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400785
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400786 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400787 """Initializes NamedCaches.
788
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400789 Arguments:
790 - cache_dir is a directory for persistent cache storage.
791 - policies is a CachePolicies instance.
792 - time_fn is a function that returns timestamp (float) and used to take
793 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400794 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400795 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400796 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000797 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400798 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
799 self._lru = lru.LRUDict()
800 if not fs.isdir(self.cache_dir):
801 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000802 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000803 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400804 self._lru = lru.LRUDict.load(self.state_file)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000805 for _, size in self._lru.values():
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000806 if not isinstance(size, six.integer_types):
Takuto Ikuta6acf8f92020-07-02 02:06:42 +0000807 with open(self.state_file, 'r') as f:
808 logging.info('named cache state file: %s\n%s', self.state_file,
809 f.read())
Junji Watanabeedcf47d2020-06-11 08:41:01 +0000810 raise ValueError("size is not integer: %s" % size)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000811
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400812 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000813 logging.exception(
814 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400815 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000816 fs.makedirs(self.cache_dir)
Takuto Ikutadadfbb02020-07-10 03:31:26 +0000817 self._lru = lru.LRUDict()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000818 with self._lock:
819 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400820 if time_fn:
821 self._lru.time_fn = time_fn
822
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400823 @property
824 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000825 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400826 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000827 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400828
Takuto Ikutaeab23172020-07-02 03:50:02 +0000829 def _sudo_chown(self, path):
830 if sys.platform == 'win32':
831 return
832 uid = os.getuid()
833 if os.stat(path).st_uid == uid:
834 return
835 # Maybe owner of |path| is different from runner of this script. This is to
836 # make fs.rename work in that case.
837 # https://crbug.com/986676
838 subprocess.check_call(['sudo', '-n', 'chown', str(uid), path])
839
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000840 def install(self, dst, name):
841 """Creates the directory |dst| and moves a previous named cache |name| if it
842 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400843
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000844 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400845
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000846 Returns the reused named cache size in bytes, or 0 if none was present.
847
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400848 Raises NamedCacheError if cannot install the cache.
849 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000850 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400851 with self._lock:
852 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000853 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400854 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000855 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400856
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000857 # Remove the named symlink if it exists.
858 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000859 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000860 # Remove the symlink itself, not its destination.
861 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000862
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000863 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000864 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400865 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000866 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000867 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000868 file_path.ensure_tree(os.path.dirname(dst))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000869 self._sudo_chown(abs_cache)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000870 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400871 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000872 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400873
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000874 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400875 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400876
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000877 # The named cache does not exist, create an empty directory. When
878 # uninstalling, we will move it back to the cache and create an an
879 # entry.
880 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000881 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000882 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400883 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000884 # Raise using the original traceback.
885 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000886 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000887 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000888 finally:
889 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400890
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000891 def uninstall(self, src, name):
892 """Moves the cache directory back into the named cache hive for an eventual
893 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400894
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000895 The opposite of install().
896
897 src must be absolute and unicode. Its content is moved back into the local
898 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400899
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000900 Returns the named cache size in bytes.
901
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400902 Raises NamedCacheError if cannot uninstall the cache.
903 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000904 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Junji Watanabe9cdfff52021-01-08 07:20:35 +0000905 start = time.time()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400906 with self._lock:
907 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000908 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400909 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000910 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000911 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400912 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400913
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000914 if name in self._lru:
915 # This shouldn't happen but just remove the preexisting one and move
916 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000917 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000918 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000919
Takuto Ikuta93483272020-06-05 09:06:34 +0000920 # Calculate the size of the named cache to keep.
Takuto Ikuta995da062021-03-17 05:01:59 +0000921 size = file_path.get_recursive_size(src)
Takuto Ikuta262f8292020-08-26 01:54:22 +0000922 logging.info('- Size is %s', size)
923 if size is None:
924 # Do not save a named cache that was deleted.
925 return
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400926
927 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000928 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400929 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000930 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400931 file_path.ensure_tree(os.path.dirname(abs_cache))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000932 self._sudo_chown(src)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000933 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400934
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000935 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000936 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000937
938 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
939 # for user convenience.
940 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000941 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000942 file_path.remove(named_path)
943 else:
944 file_path.ensure_tree(os.path.dirname(named_path))
945
946 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000947 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000948 logging.info(
949 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000950 except OSError:
951 # Ignore on Windows. It happens when running as a normal user or when
952 # UAC is enabled and the user is a filtered administrator account.
953 if sys.platform != 'win32':
954 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000955 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400956 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000957 # Raise using the original traceback.
958 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000959 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000960 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000961 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000962 # Call save() at every uninstall. The assumptions are:
963 # - The total the number of named caches is low, so the state.json file
964 # is small, so the time it takes to write it to disk is short.
965 # - The number of mapped named caches per task is low, so the number of
966 # times save() is called on tear-down isn't high enough to be
967 # significant.
968 # - uninstall() sometimes throws due to file locking on Windows or
969 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000970 self._save()
Junji Watanabe9cdfff52021-01-08 07:20:35 +0000971 logging.info('NamedCache.uninstall(%r, %r) took %d seconds', src, name,
972 time.time() - start)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400973
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000974 # Cache interface implementation.
975
976 def __len__(self):
977 with self._lock:
978 return len(self._lru)
979
980 def __iter__(self):
981 # This is not thread-safe.
982 return self._lru.__iter__()
983
John Budorickc6186972020-02-26 00:58:14 +0000984 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000985 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +0000986 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000987
988 @property
989 def total_size(self):
990 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000991 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000992
993 def get_oldest(self):
994 with self._lock:
995 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000996 # (key, (value, ts))
997 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000998 except KeyError:
999 return None
1000
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001001 def remove_oldest(self):
1002 with self._lock:
1003 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001004 _name, size = self._remove_lru_item()
1005 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001006
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001007 def save(self):
1008 with self._lock:
1009 return self._save()
1010
John Budorickc6186972020-02-26 00:58:14 +00001011 def touch(self, *names):
1012 with self._lock:
1013 for name in names:
1014 if name in self._lru:
1015 self._lru.touch(name)
1016 self._save()
1017
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001018 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001019 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001020 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001021 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001022 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001023
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001024 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001025 if self._policies.max_items:
1026 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001027 name, size = self._remove_lru_item()
1028 evicted.append(size)
1029 logging.info(
1030 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1031 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001032
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001033 # Trim according to maximum age.
1034 if self._policies.max_age_secs:
1035 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1036 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001037 _name, (_data, ts) = self._lru.get_oldest()
1038 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001039 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001040 name, size = self._remove_lru_item()
1041 evicted.append(size)
1042 logging.info(
1043 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1044 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001045
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001046 # Trim according to minimum free space.
1047 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001048 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001049 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001050 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001051 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001052 name, size = self._remove_lru_item()
1053 evicted.append(size)
1054 logging.info(
1055 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1056 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001057
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001058 # Trim according to maximum total size.
1059 if self._policies.max_cache_size:
1060 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001061 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001062 if total <= self._policies.max_cache_size:
1063 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001064 name, size = self._remove_lru_item()
1065 evicted.append(size)
1066 logging.info(
1067 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1068 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001069
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001070 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001071 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001072
1073 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001074 """Removes unknown directories.
1075
1076 Does not recalculate the cache size since it's surprisingly slow on some
1077 OSes.
1078 """
1079 success = True
1080 with self._lock:
1081 try:
1082 actual = set(fs.listdir(self.cache_dir))
1083 actual.discard(self.NAMED_DIR)
1084 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001085 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001086 # First, handle the actual cache content.
1087 # Remove missing entries.
1088 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001089 name, size = self._lru.pop(expected[missing])
1090 logging.warning(
1091 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001092 # Remove unexpected items.
1093 for unexpected in (actual - set(expected)):
1094 try:
1095 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001096 logging.warning(
1097 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001098 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001099 file_path.rmtree(p)
1100 else:
1101 fs.remove(p)
1102 except (IOError, OSError) as e:
1103 logging.error('Failed to remove %s: %s', unexpected, e)
1104 success = False
1105
1106 # Second, fix named cache links.
1107 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001108 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001109 actual = set(fs.listdir(named))
1110 expected = set(self._lru)
1111 # Confirm entries. Do not add missing ones for now.
1112 for name in expected.intersection(actual):
1113 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001114 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001115 if fs.islink(p):
1116 link = fs.readlink(p)
1117 if expected_link == link:
1118 continue
1119 logging.warning(
1120 'Unexpected symlink for cache %s: %s, expected %s',
1121 name, link, expected_link)
1122 else:
1123 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001124 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001125 file_path.rmtree(p)
1126 else:
1127 fs.remove(p)
1128 # Remove unexpected items.
1129 for unexpected in (actual - expected):
1130 try:
1131 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1132 if fs.isdir(p):
1133 file_path.rmtree(p)
1134 else:
1135 fs.remove(p)
1136 except (IOError, OSError) as e:
1137 logging.error('Failed to remove %s: %s', unexpected, e)
1138 success = False
1139 finally:
1140 self._save()
1141 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001142
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001143 # Internal functions.
1144
1145 def _try_upgrade(self):
1146 """Upgrades from the old format to the new one if necessary.
1147
1148 This code can be removed so all bots are known to have the right new format.
1149 """
1150 if not self._lru:
1151 return
1152 _name, (data, _ts) = self._lru.get_oldest()
1153 if isinstance(data, (list, tuple)):
1154 return
1155 # Update to v2.
1156 def upgrade(_name, rel_cache):
1157 abs_cache = os.path.join(self.cache_dir, rel_cache)
Takuto Ikuta995da062021-03-17 05:01:59 +00001158 return rel_cache, file_path.get_recursive_size(abs_cache)
1159
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001160 self._lru.transform(upgrade)
1161 self._save()
1162
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001163 def _remove_lru_item(self):
1164 """Removes the oldest LRU entry. LRU must not be empty."""
1165 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1166 logging.info('Removing named cache %r', name)
1167 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001168 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001169
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001170 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001171 """Creates and returns relative path of a new cache directory.
1172
1173 In practice, it is a 2-letter string.
1174 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001175 # We randomly generate directory names that have two lower/upper case
1176 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1177 abc_len = len(self._DIR_ALPHABET)
1178 tried = set()
1179 while len(tried) < 1000:
1180 i = random.randint(0, abc_len * abc_len - 1)
1181 rel_path = (
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001182 self._DIR_ALPHABET[i // abc_len] + self._DIR_ALPHABET[i % abc_len])
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001183 if rel_path in tried:
1184 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001185 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001186 if not fs.exists(abs_path):
1187 return rel_path
1188 tried.add(rel_path)
1189 raise NamedCacheError(
1190 'could not allocate a new cache dir, too many cache dirs')
1191
1192 def _remove(self, name):
1193 """Removes a cache directory and entry.
1194
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001195 Returns:
1196 Number of caches deleted.
1197 """
1198 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001199 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001200 named_dir = self._get_named_path(name)
1201 if fs.islink(named_dir):
1202 fs.unlink(named_dir)
1203
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001204 # Then remove the actual data.
1205 if name not in self._lru:
1206 return
1207 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001208 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001209 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001210 file_path.rmtree(abs_path)
1211 self._lru.pop(name)
1212
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001213 def _save(self):
1214 self._lock.assert_locked()
1215 file_path.ensure_tree(self.cache_dir)
1216 self._lru.save(self.state_file)
1217
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001218 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001219 return os.path.join(self.cache_dir, self.NAMED_DIR, name)