blob: 54ed0c9cce621289f758db15c4796f93de7fd930 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04007import errno
8import io
9import logging
10import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040011import random
12import string
Junji Watanabe7b720782020-07-01 01:51:07 +000013import subprocess
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040014import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000015import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040016
17from utils import file_path
18from utils import fs
19from utils import lru
20from utils import threading_utils
21from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000022tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040023
Lei Leife202df2019-06-11 17:33:34 +000024# third_party/
25import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040026
Junji Watanabe5e73aab2020-04-09 04:20:27 +000027import isolated_format
28
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040029# The file size to be used when we don't know the correct file size,
30# generally used for .isolated files.
31UNKNOWN_FILE_SIZE = None
32
33
34def file_write(path, content_generator):
35 """Writes file content as generated by content_generator.
36
37 Creates the intermediary directory as needed.
38
39 Returns the number of bytes written.
40
41 Meant to be mocked out in unit tests.
42 """
43 file_path.ensure_tree(os.path.dirname(path))
44 total = 0
45 with fs.open(path, 'wb') as f:
46 for d in content_generator:
47 total += len(d)
48 f.write(d)
49 return total
50
51
52def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000053 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040054
55 Currently it just checks the file exists and its size matches the expectation.
56 """
57 if size == UNKNOWN_FILE_SIZE:
58 return fs.isfile(path)
59 try:
60 actual_size = fs.stat(path).st_size
61 except OSError as e:
Junji Watanabe38b28b02020-04-23 10:23:30 +000062 logging.warning('Can\'t read item %s, assuming it\'s invalid: %s',
63 os.path.basename(path), e)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040064 return False
65 if size != actual_size:
66 logging.warning(
67 'Found invalid item %s; %d != %d',
68 os.path.basename(path), actual_size, size)
69 return False
70 return True
71
72
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000073def trim_caches(caches, path, min_free_space, max_age_secs):
74 """Trims multiple caches.
75
76 The goal here is to coherently trim all caches in a coherent LRU fashion,
77 deleting older items independent of which container they belong to.
78
79 Two policies are enforced first:
80 - max_age_secs
81 - min_free_space
82
83 Once that's done, then we enforce each cache's own policies.
84
85 Returns:
86 Slice containing the size of all items evicted.
87 """
88 min_ts = time.time() - max_age_secs if max_age_secs else 0
89 free_disk = file_path.get_free_space(path) if min_free_space else 0
Takuto Ikuta74686842021-07-30 04:11:03 +000090 logging.info("min_ts: %d, free_disk: %d, min_free_space: %d", min_ts,
91 free_disk, min_free_space)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000092 total = []
93 if min_ts or free_disk:
94 while True:
95 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
96 if not oldest:
97 break
Lei Leife202df2019-06-11 17:33:34 +000098 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000099 c, ts = oldest[0]
100 if ts >= min_ts and free_disk >= min_free_space:
101 break
102 total.append(c.remove_oldest())
103 if min_free_space:
104 free_disk = file_path.get_free_space(path)
Takuto Ikuta74686842021-07-30 04:11:03 +0000105 logging.info("free_disk after removing oldest entries: %d", free_disk)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000106 # Evaluate each cache's own policies.
107 for c in caches:
108 total.extend(c.trim())
109 return total
110
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000111
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400112class NamedCacheError(Exception):
113 """Named cache specific error."""
114
115
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400116class NoMoreSpace(Exception):
117 """Not enough space to map the whole directory."""
118 pass
119
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400120
121class CachePolicies(object):
122 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
123 """Common caching policies for the multiple caches (isolated, named, cipd).
124
125 Arguments:
126 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
127 cache is effectively a leak.
128 - min_free_space: Trim if disk free space becomes lower than this value. If
129 0, it will unconditionally fill the disk.
130 - max_items: Maximum number of items to keep in the cache. If 0, do not
131 enforce a limit.
132 - max_age_secs: Maximum age an item is kept in the cache until it is
133 automatically evicted. Having a lot of dead luggage slows
134 everything down.
135 """
136 self.max_cache_size = max_cache_size
137 self.min_free_space = min_free_space
138 self.max_items = max_items
139 self.max_age_secs = max_age_secs
140
141 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000142 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
143 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
144 self.max_cache_size, float(self.max_cache_size) / 1024**3,
145 self.max_items, self.min_free_space,
146 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400147
148
149class CacheMiss(Exception):
150 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400151 def __init__(self, digest):
152 self.digest = digest
Junji Watanabe38b28b02020-04-23 10:23:30 +0000153 super(CacheMiss,
154 self).__init__('Item with digest %r is not found in cache' % digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400155
156
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400157class Cache(object):
Junji Watanabe38b28b02020-04-23 10:23:30 +0000158
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400159 def __init__(self, cache_dir):
160 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000161 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400162 assert file_path.isabs(cache_dir), cache_dir
163 self.cache_dir = cache_dir
164 self._lock = threading_utils.LockWithAssert()
165 # Profiling values.
166 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400167 self._used = []
168
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000169 def __nonzero__(self):
170 """A cache is always True.
171
172 Otherwise it falls back to __len__, which is surprising.
173 """
174 return True
175
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000176 def __bool__(self):
177 """A cache is always True.
178
179 Otherwise it falls back to __len__, which is surprising.
180 """
181 return True
182
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000183 def __len__(self):
184 """Returns the number of entries in the cache."""
185 raise NotImplementedError()
186
187 def __iter__(self):
188 """Iterates over all the entries names."""
189 raise NotImplementedError()
190
191 def __contains__(self, name):
192 """Returns if an entry is in the cache."""
193 raise NotImplementedError()
194
195 @property
196 def total_size(self):
197 """Returns the total size of the cache in bytes."""
198 raise NotImplementedError()
199
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400200 @property
201 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000202 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400203 with self._lock:
204 return self._added[:]
205
206 @property
207 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000208 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400209 with self._lock:
210 return self._used[:]
211
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000212 def get_oldest(self):
213 """Returns timestamp of oldest cache entry or None.
214
215 Returns:
216 Timestamp of the oldest item.
217
218 Used for manual trimming.
219 """
220 raise NotImplementedError()
221
222 def remove_oldest(self):
223 """Removes the oldest item from the cache.
224
225 Returns:
226 Size of the oldest item.
227
228 Used for manual trimming.
229 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400230 raise NotImplementedError()
231
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000232 def save(self):
233 """Saves the current cache to disk."""
234 raise NotImplementedError()
235
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400236 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000237 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400238
239 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000240 Slice with the size of evicted items.
241 """
242 raise NotImplementedError()
243
244 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000245 """Deletes any corrupted item from the cache, then calls trim(), then
246 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000247
248 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400249 """
250 raise NotImplementedError()
251
252
253class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400254 """Content addressed cache that stores objects temporarily.
255
256 It can be accessed concurrently from multiple threads, so it should protect
257 its internal state with some lock.
258 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400259
260 def __enter__(self):
261 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000262 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400263 return self
264
265 def __exit__(self, _exc_type, _exec_value, _traceback):
266 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000267 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400268 return False
269
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400270 def touch(self, digest, size):
271 """Ensures item is not corrupted and updates its LRU position.
272
273 Arguments:
274 digest: hash digest of item to check.
275 size: expected size of this item.
276
277 Returns:
278 True if item is in cache and not corrupted.
279 """
280 raise NotImplementedError()
281
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400282 def getfileobj(self, digest):
283 """Returns a readable file like object.
284
285 If file exists on the file system it will have a .name attribute with an
286 absolute path to the file.
287 """
288 raise NotImplementedError()
289
290 def write(self, digest, content):
291 """Reads data from |content| generator and stores it in cache.
292
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000293 It is possible to write to an object that already exists. It may be
294 ignored (sent to /dev/null) but the timestamp is still updated.
295
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400296 Returns digest to simplify chaining.
297 """
298 raise NotImplementedError()
299
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400300
301class MemoryContentAddressedCache(ContentAddressedCache):
302 """ContentAddressedCache implementation that stores everything in memory."""
303
Lei Leife202df2019-06-11 17:33:34 +0000304 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400305 """Args:
306 file_mode_mask: bit mask to AND file mode with. Default value will make
307 all mapped files to be read only.
308 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400309 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400310 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000311 # Items in a LRU lookup dict(digest: size).
312 self._lru = lru.LRUDict()
313
314 # Cache interface implementation.
315
316 def __len__(self):
317 with self._lock:
318 return len(self._lru)
319
320 def __iter__(self):
321 # This is not thread-safe.
322 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400323
324 def __contains__(self, digest):
325 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000326 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400327
328 @property
329 def total_size(self):
330 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000331 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400332
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000333 def get_oldest(self):
334 with self._lock:
335 try:
336 # (key, (value, ts))
337 return self._lru.get_oldest()[1][1]
338 except KeyError:
339 return None
340
341 def remove_oldest(self):
342 with self._lock:
343 # TODO(maruel): Update self._added.
344 # (key, (value, ts))
345 return len(self._lru.pop_oldest()[1][0])
346
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000347 def save(self):
348 pass
349
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000350 def trim(self):
351 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000352 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400353
354 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000355 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400356 pass
357
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000358 # ContentAddressedCache interface implementation.
359
360 def __contains__(self, digest):
361 with self._lock:
362 return digest in self._lru
363
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400364 def touch(self, digest, size):
365 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000366 try:
367 self._lru.touch(digest)
368 except KeyError:
369 return False
370 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400371
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400372 def getfileobj(self, digest):
373 with self._lock:
374 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000375 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400376 except KeyError:
377 raise CacheMiss(digest)
378 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000379 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400380 return io.BytesIO(d)
381
382 def write(self, digest, content):
383 # Assemble whole stream before taking the lock.
Lei Lei73a5f732020-03-23 20:36:14 +0000384 data = six.b('').join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400385 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000386 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400387 self._added.append(len(data))
388 return digest
389
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400390
391class DiskContentAddressedCache(ContentAddressedCache):
392 """Stateful LRU cache in a flat hash table in a directory.
393
394 Saves its state as json file.
395 """
396 STATE_FILE = u'state.json'
397
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000398 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400399 """
400 Arguments:
401 cache_dir: directory where to place the cache.
402 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400403 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000404 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400405 """
406 # All protected methods (starting with '_') except _path should be called
407 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400408 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400409 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400410 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
411 # Items in a LRU lookup dict(digest: size).
412 self._lru = lru.LRUDict()
413 # Current cached free disk space. It is updated by self._trim().
414 file_path.ensure_tree(self.cache_dir)
415 self._free_disk = file_path.get_free_space(self.cache_dir)
416 # The first item in the LRU cache that must not be evicted during this run
417 # since it was referenced. All items more recent that _protected in the LRU
418 # cache are also inherently protected. It could be a set() of all items
419 # referenced but this increases memory usage without a use case.
420 self._protected = None
421 # Cleanup operations done by self._load(), if any.
422 self._operations = []
423 with tools.Profiler('Setup'):
424 with self._lock:
425 self._load(trim, time_fn)
426
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000427 # Cache interface implementation.
428
429 def __len__(self):
430 with self._lock:
431 return len(self._lru)
432
433 def __iter__(self):
434 # This is not thread-safe.
435 return self._lru.__iter__()
436
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400437 def __contains__(self, digest):
438 with self._lock:
439 return digest in self._lru
440
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400441 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400442 def total_size(self):
443 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000444 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400445
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000446 def get_oldest(self):
447 with self._lock:
448 try:
449 # (key, (value, ts))
450 return self._lru.get_oldest()[1][1]
451 except KeyError:
452 return None
453
454 def remove_oldest(self):
455 with self._lock:
456 # TODO(maruel): Update self._added.
457 return self._remove_lru_file(True)
458
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000459 def save(self):
460 with self._lock:
461 return self._save()
462
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000463 def trim(self):
464 """Forces retention policies."""
465 with self._lock:
466 return self._trim()
467
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400468 def cleanup(self):
469 """Cleans up the cache directory.
470
471 Ensures there is no unknown files in cache_dir.
472 Ensures the read-only bits are set correctly.
473
474 At that point, the cache was already loaded, trimmed to respect cache
475 policies.
476 """
477 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000478 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400479 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000480 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400481 # It'd be faster if there were a readdir() function.
482 for filename in fs.listdir(self.cache_dir):
483 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000484 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400485 continue
486 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000487 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400488 previous.remove(filename)
489 continue
490
491 # An untracked file. Delete it.
492 logging.warning('Removing unknown file %s from cache', filename)
493 p = self._path(filename)
494 if fs.isdir(p):
495 try:
496 file_path.rmtree(p)
497 except OSError:
498 pass
499 else:
500 file_path.try_remove(p)
501 continue
502
503 if previous:
504 # Filter out entries that were not found.
505 logging.warning('Removed %d lost files', len(previous))
506 for filename in previous:
507 self._lru.pop(filename)
508 self._save()
509
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000510 # Verify hash of every single item to detect corruption. the corrupted
511 # files will be evicted.
512 with self._lock:
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000513 for digest, (_, timestamp) in list(self._lru._items.items()):
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000514 # verify only if the mtime is grather than the timestamp in state.json
515 # to avoid take too long time.
516 if self._get_mtime(digest) <= timestamp:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000517 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000518 logging.warning('Item has been modified. item: %s', digest)
519 if self._is_valid_hash(digest):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000520 # Update timestamp in state.json
521 self._lru.touch(digest)
522 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000523 # remove corrupted file from LRU and file system
524 self._lru.pop(digest)
525 self._delete_file(digest, UNKNOWN_FILE_SIZE)
526 logging.error('Deleted corrupted item: %s', digest)
527 self._save()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400528
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000529 # ContentAddressedCache interface implementation.
530
531 def __contains__(self, digest):
532 with self._lock:
533 return digest in self._lru
534
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400535 def touch(self, digest, size):
536 """Verifies an actual file is valid and bumps its LRU position.
537
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000538 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400539
540 Note that is doesn't compute the hash so it could still be corrupted if the
541 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400542 """
543 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000544 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400545
546 # Update its LRU position.
547 with self._lock:
548 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000549 if looks_valid:
550 # Exists but not in the LRU anymore.
551 self._delete_file(digest, size)
552 return False
553 if not looks_valid:
554 self._lru.pop(digest)
555 # Exists but not in the LRU anymore.
556 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400557 return False
558 self._lru.touch(digest)
559 self._protected = self._protected or digest
560 return True
561
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400562 def getfileobj(self, digest):
563 try:
564 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400565 except IOError:
566 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000567 with self._lock:
568 try:
569 self._used.append(self._lru[digest])
570 except KeyError:
571 # If the digest is not actually in _lru, assume it is a cache miss.
572 # Existing file will be overwritten by whoever uses the cache and added
573 # to _lru.
574 f.close()
575 raise CacheMiss(digest)
576 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400577
578 def write(self, digest, content):
579 assert content is not None
580 with self._lock:
581 self._protected = self._protected or digest
582 path = self._path(digest)
583 # A stale broken file may remain. It is possible for the file to have write
584 # access bit removed which would cause the file_write() call to fail to open
585 # in write mode. Take no chance here.
586 file_path.try_remove(path)
587 try:
588 size = file_write(path, content)
589 except:
590 # There are two possible places were an exception can occur:
591 # 1) Inside |content| generator in case of network or unzipping errors.
592 # 2) Inside file_write itself in case of disk IO errors.
593 # In any case delete an incomplete file and propagate the exception to
594 # caller, it will be logged there.
595 file_path.try_remove(path)
596 raise
597 # Make the file read-only in the cache. This has a few side-effects since
598 # the file node is modified, so every directory entries to this file becomes
599 # read-only. It's fine here because it is a new file.
600 file_path.set_read_only(path, True)
601 with self._lock:
602 self._add(digest, size)
603 return digest
604
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000605 # Internal functions.
606
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400607 def _load(self, trim, time_fn):
608 """Loads state of the cache from json file.
609
610 If cache_dir does not exist on disk, it is created.
611 """
612 self._lock.assert_locked()
613
614 if not fs.isfile(self.state_file):
615 if not fs.isdir(self.cache_dir):
616 fs.makedirs(self.cache_dir)
617 else:
618 # Load state of the cache.
619 try:
620 self._lru = lru.LRUDict.load(self.state_file)
621 except ValueError as err:
622 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000623 # Don't want to keep broken cache dir.
624 file_path.rmtree(self.cache_dir)
625 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000626 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400627 if time_fn:
628 self._lru.time_fn = time_fn
629 if trim:
630 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400631
632 def _save(self):
633 """Saves the LRU ordering."""
634 self._lock.assert_locked()
635 if sys.platform != 'win32':
636 d = os.path.dirname(self.state_file)
637 if fs.isdir(d):
638 # Necessary otherwise the file can't be created.
639 file_path.set_read_only(d, False)
640 if fs.isfile(self.state_file):
641 file_path.set_read_only(self.state_file, False)
642 self._lru.save(self.state_file)
643
644 def _trim(self):
645 """Trims anything we don't know, make sure enough free space exists."""
646 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000647 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400648
649 # Trim old items.
650 if self.policies.max_age_secs:
651 cutoff = self._lru.time_fn() - self.policies.max_age_secs
652 while self._lru:
653 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000654 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400655 if oldest[1][1] >= cutoff:
656 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000657 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400658
659 # Ensure maximum cache size.
660 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000661 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400662 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000663 e = self._remove_lru_file(True)
664 evicted.append(e)
665 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400666
667 # Ensure maximum number of items in the cache.
668 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000669 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000670 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400671
672 # Ensure enough free space.
673 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400674 while (
675 self.policies.min_free_space and
676 self._lru and
677 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000678 # self._free_disk is updated by this call.
679 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400680
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000681 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000682 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400683 usage_percent = 0.
684 if total_usage:
685 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
686
687 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000688 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
689 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
Junji Watanabe38b28b02020-04-23 10:23:30 +0000690 '%.1fkb)', len(evicted),
691 sum(evicted) / 1024., self._free_disk / 1024., total_usage / 1024.,
692 usage_percent, self.policies.max_cache_size / 1024.)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400693 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000694 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400695
696 def _path(self, digest):
697 """Returns the path to one item."""
698 return os.path.join(self.cache_dir, digest)
699
700 def _remove_lru_file(self, allow_protected):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000701 """Removes the latest recently used file and returns its size.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000702
703 Updates self._free_disk.
704 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400705 self._lock.assert_locked()
706 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000707 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400708 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000709 total_size = sum(self._lru.values())
710 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000711 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
712 '%s bytes (%.3f GiB) free_space') % (
713 self.policies, total_size, float(total_size) / 1024**3,
714 len(self._lru), self._free_disk,
715 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400716 raise NoMoreSpace(msg)
717 except KeyError:
718 # That means an internal error.
719 raise NoMoreSpace('Nothing to remove, can\'t happend')
720 digest, (size, _) = self._lru.pop_oldest()
Takuto Ikuta8d8ca9b2021-02-26 02:31:43 +0000721 logging.debug('Removing LRU file %s with size %s bytes', digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400722 self._delete_file(digest, size)
723 return size
724
725 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
726 """Adds an item into LRU cache marking it as a newest one."""
727 self._lock.assert_locked()
728 if size == UNKNOWN_FILE_SIZE:
729 size = fs.stat(self._path(digest)).st_size
730 self._added.append(size)
731 self._lru.add(digest, size)
732 self._free_disk -= size
733 # Do a quicker version of self._trim(). It only enforces free disk space,
734 # not cache size limits. It doesn't actually look at real free disk space,
735 # only uses its cache values. self._trim() will be called later to enforce
736 # real trimming but doing this quick version here makes it possible to map
737 # an isolated that is larger than the current amount of free disk space when
738 # the cache size is already large.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000739 while (self.policies.min_free_space and self._lru and
740 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000741 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400742 if self._remove_lru_file(False) == -1:
743 break
744
745 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000746 """Deletes cache file from the file system.
747
748 Updates self._free_disk.
749 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400750 self._lock.assert_locked()
751 try:
752 if size == UNKNOWN_FILE_SIZE:
753 try:
754 size = fs.stat(self._path(digest)).st_size
755 except OSError:
756 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000757 if file_path.try_remove(self._path(digest)):
758 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400759 except OSError as e:
760 if e.errno != errno.ENOENT:
761 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400762
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000763 def _get_mtime(self, digest):
764 """Get mtime of cache file."""
765 return os.path.getmtime(self._path(digest))
766
767 def _is_valid_hash(self, digest):
768 """Verify digest with supported hash algos."""
769 for _, algo in isolated_format.SUPPORTED_ALGOS.items():
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000770 if digest == isolated_format.hash_file(self._path(digest), algo):
771 return True
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000772 return False
773
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400774
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400775class NamedCache(Cache):
776 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400777
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400778 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400779 name is a short identifier that describes the contents of the cache, e.g.
780 "git_v8" could be all git repositories required by v8 builds, or
781 "build_chromium" could be build artefacts of the Chromium.
782 path is a directory path relative to the task run dir. Cache installation
783 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400784 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400785 _DIR_ALPHABET = string.ascii_letters + string.digits
786 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000787 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400788
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400789 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400790 """Initializes NamedCaches.
791
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400792 Arguments:
793 - cache_dir is a directory for persistent cache storage.
794 - policies is a CachePolicies instance.
795 - time_fn is a function that returns timestamp (float) and used to take
796 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400797 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400798 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400799 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000800 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400801 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
802 self._lru = lru.LRUDict()
803 if not fs.isdir(self.cache_dir):
804 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000805 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000806 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400807 self._lru = lru.LRUDict.load(self.state_file)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000808 for _, size in self._lru.values():
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000809 if not isinstance(size, six.integer_types):
Takuto Ikuta6acf8f92020-07-02 02:06:42 +0000810 with open(self.state_file, 'r') as f:
811 logging.info('named cache state file: %s\n%s', self.state_file,
812 f.read())
Junji Watanabeedcf47d2020-06-11 08:41:01 +0000813 raise ValueError("size is not integer: %s" % size)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000814
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400815 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000816 logging.exception(
817 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400818 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000819 fs.makedirs(self.cache_dir)
Takuto Ikutadadfbb02020-07-10 03:31:26 +0000820 self._lru = lru.LRUDict()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000821 with self._lock:
822 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400823 if time_fn:
824 self._lru.time_fn = time_fn
825
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400826 @property
827 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000828 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400829 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000830 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400831
Takuto Ikutaeab23172020-07-02 03:50:02 +0000832 def _sudo_chown(self, path):
833 if sys.platform == 'win32':
834 return
835 uid = os.getuid()
836 if os.stat(path).st_uid == uid:
837 return
838 # Maybe owner of |path| is different from runner of this script. This is to
839 # make fs.rename work in that case.
840 # https://crbug.com/986676
841 subprocess.check_call(['sudo', '-n', 'chown', str(uid), path])
842
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000843 def install(self, dst, name):
844 """Creates the directory |dst| and moves a previous named cache |name| if it
845 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400846
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000847 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400848
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000849 Returns the reused named cache size in bytes, or 0 if none was present.
850
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400851 Raises NamedCacheError if cannot install the cache.
852 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000853 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400854 with self._lock:
855 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000856 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400857 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000858 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400859
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000860 # Remove the named symlink if it exists.
861 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000862 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000863 # Remove the symlink itself, not its destination.
864 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000865
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000866 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000867 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400868 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000869 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000870 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000871 file_path.ensure_tree(os.path.dirname(dst))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000872 self._sudo_chown(abs_cache)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000873 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400874 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000875 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400876
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000877 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400878 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400879
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000880 # The named cache does not exist, create an empty directory. When
881 # uninstalling, we will move it back to the cache and create an an
882 # entry.
883 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000884 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000885 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400886 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000887 # Raise using the original traceback.
888 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000889 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000890 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000891 finally:
892 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400893
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000894 def uninstall(self, src, name):
895 """Moves the cache directory back into the named cache hive for an eventual
896 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400897
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000898 The opposite of install().
899
900 src must be absolute and unicode. Its content is moved back into the local
901 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400902
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000903 Returns the named cache size in bytes.
904
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400905 Raises NamedCacheError if cannot uninstall the cache.
906 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000907 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Junji Watanabe9cdfff52021-01-08 07:20:35 +0000908 start = time.time()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400909 with self._lock:
910 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000911 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400912 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000913 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000914 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400915 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400916
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000917 if name in self._lru:
918 # This shouldn't happen but just remove the preexisting one and move
919 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000920 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000921 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000922
Takuto Ikuta93483272020-06-05 09:06:34 +0000923 # Calculate the size of the named cache to keep.
Takuto Ikuta995da062021-03-17 05:01:59 +0000924 size = file_path.get_recursive_size(src)
Takuto Ikuta262f8292020-08-26 01:54:22 +0000925 logging.info('- Size is %s', size)
926 if size is None:
927 # Do not save a named cache that was deleted.
928 return
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400929
930 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000931 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400932 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000933 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400934 file_path.ensure_tree(os.path.dirname(abs_cache))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000935 self._sudo_chown(src)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000936 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400937
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000938 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000939 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000940
941 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
942 # for user convenience.
943 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000944 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000945 file_path.remove(named_path)
946 else:
947 file_path.ensure_tree(os.path.dirname(named_path))
948
949 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000950 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000951 logging.info(
952 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000953 except OSError:
954 # Ignore on Windows. It happens when running as a normal user or when
955 # UAC is enabled and the user is a filtered administrator account.
956 if sys.platform != 'win32':
957 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000958 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400959 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000960 # Raise using the original traceback.
961 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000962 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000963 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000964 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000965 # Call save() at every uninstall. The assumptions are:
966 # - The total the number of named caches is low, so the state.json file
967 # is small, so the time it takes to write it to disk is short.
968 # - The number of mapped named caches per task is low, so the number of
969 # times save() is called on tear-down isn't high enough to be
970 # significant.
971 # - uninstall() sometimes throws due to file locking on Windows or
972 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000973 self._save()
Junji Watanabe9cdfff52021-01-08 07:20:35 +0000974 logging.info('NamedCache.uninstall(%r, %r) took %d seconds', src, name,
975 time.time() - start)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400976
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000977 # Cache interface implementation.
978
979 def __len__(self):
980 with self._lock:
981 return len(self._lru)
982
983 def __iter__(self):
984 # This is not thread-safe.
985 return self._lru.__iter__()
986
John Budorickc6186972020-02-26 00:58:14 +0000987 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000988 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +0000989 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000990
991 @property
992 def total_size(self):
993 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000994 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000995
996 def get_oldest(self):
997 with self._lock:
998 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000999 # (key, (value, ts))
1000 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001001 except KeyError:
1002 return None
1003
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001004 def remove_oldest(self):
1005 with self._lock:
1006 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001007 _name, size = self._remove_lru_item()
1008 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001009
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001010 def save(self):
1011 with self._lock:
1012 return self._save()
1013
John Budorickc6186972020-02-26 00:58:14 +00001014 def touch(self, *names):
1015 with self._lock:
1016 for name in names:
1017 if name in self._lru:
1018 self._lru.touch(name)
1019 self._save()
1020
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001021 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001022 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001023 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001024 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001025 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001026
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001027 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001028 if self._policies.max_items:
1029 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001030 name, size = self._remove_lru_item()
1031 evicted.append(size)
1032 logging.info(
1033 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1034 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001035
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001036 # Trim according to maximum age.
1037 if self._policies.max_age_secs:
1038 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1039 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001040 _name, (_data, ts) = self._lru.get_oldest()
1041 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001042 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001043 name, size = self._remove_lru_item()
1044 evicted.append(size)
1045 logging.info(
1046 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1047 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001048
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001049 # Trim according to minimum free space.
1050 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001051 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001052 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001053 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001054 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001055 name, size = self._remove_lru_item()
1056 evicted.append(size)
1057 logging.info(
1058 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1059 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001060
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001061 # Trim according to maximum total size.
1062 if self._policies.max_cache_size:
1063 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001064 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001065 if total <= self._policies.max_cache_size:
1066 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001067 name, size = self._remove_lru_item()
1068 evicted.append(size)
1069 logging.info(
1070 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1071 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001072
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001073 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001074 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001075
1076 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001077 """Removes unknown directories.
1078
1079 Does not recalculate the cache size since it's surprisingly slow on some
1080 OSes.
1081 """
1082 success = True
1083 with self._lock:
1084 try:
1085 actual = set(fs.listdir(self.cache_dir))
1086 actual.discard(self.NAMED_DIR)
1087 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001088 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001089 # First, handle the actual cache content.
1090 # Remove missing entries.
1091 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001092 name, size = self._lru.pop(expected[missing])
1093 logging.warning(
1094 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001095 # Remove unexpected items.
1096 for unexpected in (actual - set(expected)):
1097 try:
1098 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001099 logging.warning(
1100 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001101 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001102 file_path.rmtree(p)
1103 else:
1104 fs.remove(p)
1105 except (IOError, OSError) as e:
1106 logging.error('Failed to remove %s: %s', unexpected, e)
1107 success = False
1108
1109 # Second, fix named cache links.
1110 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001111 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001112 actual = set(fs.listdir(named))
1113 expected = set(self._lru)
1114 # Confirm entries. Do not add missing ones for now.
1115 for name in expected.intersection(actual):
1116 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001117 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001118 if fs.islink(p):
1119 link = fs.readlink(p)
1120 if expected_link == link:
1121 continue
1122 logging.warning(
1123 'Unexpected symlink for cache %s: %s, expected %s',
1124 name, link, expected_link)
1125 else:
1126 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001127 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001128 file_path.rmtree(p)
1129 else:
1130 fs.remove(p)
1131 # Remove unexpected items.
1132 for unexpected in (actual - expected):
1133 try:
1134 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1135 if fs.isdir(p):
1136 file_path.rmtree(p)
1137 else:
1138 fs.remove(p)
1139 except (IOError, OSError) as e:
1140 logging.error('Failed to remove %s: %s', unexpected, e)
1141 success = False
1142 finally:
1143 self._save()
1144 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001145
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001146 # Internal functions.
1147
1148 def _try_upgrade(self):
1149 """Upgrades from the old format to the new one if necessary.
1150
1151 This code can be removed so all bots are known to have the right new format.
1152 """
1153 if not self._lru:
1154 return
1155 _name, (data, _ts) = self._lru.get_oldest()
1156 if isinstance(data, (list, tuple)):
1157 return
1158 # Update to v2.
1159 def upgrade(_name, rel_cache):
1160 abs_cache = os.path.join(self.cache_dir, rel_cache)
Takuto Ikuta995da062021-03-17 05:01:59 +00001161 return rel_cache, file_path.get_recursive_size(abs_cache)
1162
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001163 self._lru.transform(upgrade)
1164 self._save()
1165
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001166 def _remove_lru_item(self):
1167 """Removes the oldest LRU entry. LRU must not be empty."""
1168 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
Takuto Ikuta74686842021-07-30 04:11:03 +00001169 logging.info('Removing named cache %r, %d', name, size)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001170 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001171 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001172
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001173 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001174 """Creates and returns relative path of a new cache directory.
1175
1176 In practice, it is a 2-letter string.
1177 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001178 # We randomly generate directory names that have two lower/upper case
1179 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1180 abc_len = len(self._DIR_ALPHABET)
1181 tried = set()
1182 while len(tried) < 1000:
1183 i = random.randint(0, abc_len * abc_len - 1)
1184 rel_path = (
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001185 self._DIR_ALPHABET[i // abc_len] + self._DIR_ALPHABET[i % abc_len])
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001186 if rel_path in tried:
1187 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001188 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001189 if not fs.exists(abs_path):
1190 return rel_path
1191 tried.add(rel_path)
1192 raise NamedCacheError(
1193 'could not allocate a new cache dir, too many cache dirs')
1194
1195 def _remove(self, name):
1196 """Removes a cache directory and entry.
1197
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001198 Returns:
1199 Number of caches deleted.
1200 """
1201 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001202 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001203 named_dir = self._get_named_path(name)
1204 if fs.islink(named_dir):
1205 fs.unlink(named_dir)
1206
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001207 # Then remove the actual data.
1208 if name not in self._lru:
1209 return
1210 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001211 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001212 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001213 file_path.rmtree(abs_path)
1214 self._lru.pop(name)
1215
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001216 def _save(self):
1217 self._lock.assert_locked()
1218 file_path.ensure_tree(self.cache_dir)
1219 self._lru.save(self.state_file)
1220
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001221 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001222 return os.path.join(self.cache_dir, self.NAMED_DIR, name)