blob: e87636715b9890906fe88379f12e853c1d515594 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04007import errno
8import io
9import logging
10import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040011import random
12import string
Junji Watanabe7b720782020-07-01 01:51:07 +000013import subprocess
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040014import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000015import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040016
17from utils import file_path
18from utils import fs
19from utils import lru
20from utils import threading_utils
21from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000022tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040023
Lei Leife202df2019-06-11 17:33:34 +000024# third_party/
25import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040026
Junji Watanabe5e73aab2020-04-09 04:20:27 +000027import isolated_format
28
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040029# The file size to be used when we don't know the correct file size,
30# generally used for .isolated files.
31UNKNOWN_FILE_SIZE = None
32
33
34def file_write(path, content_generator):
35 """Writes file content as generated by content_generator.
36
37 Creates the intermediary directory as needed.
38
39 Returns the number of bytes written.
40
41 Meant to be mocked out in unit tests.
42 """
43 file_path.ensure_tree(os.path.dirname(path))
44 total = 0
45 with fs.open(path, 'wb') as f:
46 for d in content_generator:
47 total += len(d)
48 f.write(d)
49 return total
50
51
52def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000053 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040054
55 Currently it just checks the file exists and its size matches the expectation.
56 """
57 if size == UNKNOWN_FILE_SIZE:
58 return fs.isfile(path)
59 try:
60 actual_size = fs.stat(path).st_size
61 except OSError as e:
Junji Watanabe38b28b02020-04-23 10:23:30 +000062 logging.warning('Can\'t read item %s, assuming it\'s invalid: %s',
63 os.path.basename(path), e)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040064 return False
65 if size != actual_size:
66 logging.warning(
67 'Found invalid item %s; %d != %d',
68 os.path.basename(path), actual_size, size)
69 return False
70 return True
71
72
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000073def trim_caches(caches, path, min_free_space, max_age_secs):
74 """Trims multiple caches.
75
76 The goal here is to coherently trim all caches in a coherent LRU fashion,
77 deleting older items independent of which container they belong to.
78
79 Two policies are enforced first:
80 - max_age_secs
81 - min_free_space
82
83 Once that's done, then we enforce each cache's own policies.
84
85 Returns:
86 Slice containing the size of all items evicted.
87 """
88 min_ts = time.time() - max_age_secs if max_age_secs else 0
89 free_disk = file_path.get_free_space(path) if min_free_space else 0
Junji Watanabe66041012021-08-11 06:40:08 +000090 logging.info("Trimming caches. min_ts: %d, free_disk: %d, min_free_space: %d",
91 min_ts, free_disk, min_free_space)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000092 total = []
93 if min_ts or free_disk:
94 while True:
95 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
96 if not oldest:
97 break
Lei Leife202df2019-06-11 17:33:34 +000098 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000099 c, ts = oldest[0]
100 if ts >= min_ts and free_disk >= min_free_space:
101 break
102 total.append(c.remove_oldest())
103 if min_free_space:
104 free_disk = file_path.get_free_space(path)
Takuto Ikuta74686842021-07-30 04:11:03 +0000105 logging.info("free_disk after removing oldest entries: %d", free_disk)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000106 # Evaluate each cache's own policies.
107 for c in caches:
108 total.extend(c.trim())
109 return total
110
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000111
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400112class NamedCacheError(Exception):
113 """Named cache specific error."""
114
115
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400116class NoMoreSpace(Exception):
117 """Not enough space to map the whole directory."""
118 pass
119
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400120
121class CachePolicies(object):
122 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
123 """Common caching policies for the multiple caches (isolated, named, cipd).
124
125 Arguments:
126 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
127 cache is effectively a leak.
128 - min_free_space: Trim if disk free space becomes lower than this value. If
129 0, it will unconditionally fill the disk.
130 - max_items: Maximum number of items to keep in the cache. If 0, do not
131 enforce a limit.
132 - max_age_secs: Maximum age an item is kept in the cache until it is
133 automatically evicted. Having a lot of dead luggage slows
134 everything down.
135 """
136 self.max_cache_size = max_cache_size
137 self.min_free_space = min_free_space
138 self.max_items = max_items
139 self.max_age_secs = max_age_secs
140
141 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000142 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
143 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
144 self.max_cache_size, float(self.max_cache_size) / 1024**3,
145 self.max_items, self.min_free_space,
146 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400147
148
149class CacheMiss(Exception):
150 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400151 def __init__(self, digest):
152 self.digest = digest
Junji Watanabe38b28b02020-04-23 10:23:30 +0000153 super(CacheMiss,
154 self).__init__('Item with digest %r is not found in cache' % digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400155
156
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400157class Cache(object):
Junji Watanabe38b28b02020-04-23 10:23:30 +0000158
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400159 def __init__(self, cache_dir):
160 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000161 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400162 assert file_path.isabs(cache_dir), cache_dir
163 self.cache_dir = cache_dir
164 self._lock = threading_utils.LockWithAssert()
165 # Profiling values.
166 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400167 self._used = []
168
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000169 def __nonzero__(self):
170 """A cache is always True.
171
172 Otherwise it falls back to __len__, which is surprising.
173 """
174 return True
175
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000176 def __bool__(self):
177 """A cache is always True.
178
179 Otherwise it falls back to __len__, which is surprising.
180 """
181 return True
182
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000183 def __len__(self):
184 """Returns the number of entries in the cache."""
185 raise NotImplementedError()
186
187 def __iter__(self):
188 """Iterates over all the entries names."""
189 raise NotImplementedError()
190
191 def __contains__(self, name):
192 """Returns if an entry is in the cache."""
193 raise NotImplementedError()
194
195 @property
196 def total_size(self):
197 """Returns the total size of the cache in bytes."""
198 raise NotImplementedError()
199
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400200 @property
201 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000202 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400203 with self._lock:
204 return self._added[:]
205
206 @property
207 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000208 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400209 with self._lock:
210 return self._used[:]
211
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000212 def get_oldest(self):
213 """Returns timestamp of oldest cache entry or None.
214
215 Returns:
216 Timestamp of the oldest item.
217
218 Used for manual trimming.
219 """
220 raise NotImplementedError()
221
222 def remove_oldest(self):
223 """Removes the oldest item from the cache.
224
225 Returns:
226 Size of the oldest item.
227
228 Used for manual trimming.
229 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400230 raise NotImplementedError()
231
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000232 def save(self):
233 """Saves the current cache to disk."""
234 raise NotImplementedError()
235
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400236 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000237 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400238
239 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000240 Slice with the size of evicted items.
241 """
242 raise NotImplementedError()
243
244 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000245 """Deletes any corrupted item from the cache, then calls trim(), then
246 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000247
248 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400249 """
250 raise NotImplementedError()
251
252
253class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400254 """Content addressed cache that stores objects temporarily.
255
256 It can be accessed concurrently from multiple threads, so it should protect
257 its internal state with some lock.
258 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400259
260 def __enter__(self):
261 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000262 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400263 return self
264
265 def __exit__(self, _exc_type, _exec_value, _traceback):
266 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000267 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400268 return False
269
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400270 def touch(self, digest, size):
271 """Ensures item is not corrupted and updates its LRU position.
272
273 Arguments:
274 digest: hash digest of item to check.
275 size: expected size of this item.
276
277 Returns:
278 True if item is in cache and not corrupted.
279 """
280 raise NotImplementedError()
281
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400282 def getfileobj(self, digest):
283 """Returns a readable file like object.
284
285 If file exists on the file system it will have a .name attribute with an
286 absolute path to the file.
287 """
288 raise NotImplementedError()
289
290 def write(self, digest, content):
291 """Reads data from |content| generator and stores it in cache.
292
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000293 It is possible to write to an object that already exists. It may be
294 ignored (sent to /dev/null) but the timestamp is still updated.
295
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400296 Returns digest to simplify chaining.
297 """
298 raise NotImplementedError()
299
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400300
301class MemoryContentAddressedCache(ContentAddressedCache):
302 """ContentAddressedCache implementation that stores everything in memory."""
303
Lei Leife202df2019-06-11 17:33:34 +0000304 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400305 """Args:
306 file_mode_mask: bit mask to AND file mode with. Default value will make
307 all mapped files to be read only.
308 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400309 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400310 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000311 # Items in a LRU lookup dict(digest: size).
312 self._lru = lru.LRUDict()
313
314 # Cache interface implementation.
315
316 def __len__(self):
317 with self._lock:
318 return len(self._lru)
319
320 def __iter__(self):
321 # This is not thread-safe.
322 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400323
324 def __contains__(self, digest):
325 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000326 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400327
328 @property
329 def total_size(self):
330 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000331 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400332
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000333 def get_oldest(self):
334 with self._lock:
335 try:
336 # (key, (value, ts))
337 return self._lru.get_oldest()[1][1]
338 except KeyError:
339 return None
340
341 def remove_oldest(self):
342 with self._lock:
343 # TODO(maruel): Update self._added.
344 # (key, (value, ts))
345 return len(self._lru.pop_oldest()[1][0])
346
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000347 def save(self):
348 pass
349
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000350 def trim(self):
351 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000352 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400353
354 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000355 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400356 pass
357
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000358 # ContentAddressedCache interface implementation.
359
360 def __contains__(self, digest):
361 with self._lock:
362 return digest in self._lru
363
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400364 def touch(self, digest, size):
365 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000366 try:
367 self._lru.touch(digest)
368 except KeyError:
369 return False
370 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400371
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400372 def getfileobj(self, digest):
373 with self._lock:
374 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000375 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400376 except KeyError:
377 raise CacheMiss(digest)
378 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000379 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400380 return io.BytesIO(d)
381
382 def write(self, digest, content):
383 # Assemble whole stream before taking the lock.
Lei Lei73a5f732020-03-23 20:36:14 +0000384 data = six.b('').join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400385 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000386 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400387 self._added.append(len(data))
388 return digest
389
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400390
391class DiskContentAddressedCache(ContentAddressedCache):
392 """Stateful LRU cache in a flat hash table in a directory.
393
394 Saves its state as json file.
395 """
396 STATE_FILE = u'state.json'
397
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000398 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400399 """
400 Arguments:
401 cache_dir: directory where to place the cache.
402 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400403 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000404 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400405 """
406 # All protected methods (starting with '_') except _path should be called
407 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400408 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400409 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400410 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
411 # Items in a LRU lookup dict(digest: size).
412 self._lru = lru.LRUDict()
413 # Current cached free disk space. It is updated by self._trim().
414 file_path.ensure_tree(self.cache_dir)
415 self._free_disk = file_path.get_free_space(self.cache_dir)
416 # The first item in the LRU cache that must not be evicted during this run
417 # since it was referenced. All items more recent that _protected in the LRU
418 # cache are also inherently protected. It could be a set() of all items
419 # referenced but this increases memory usage without a use case.
420 self._protected = None
421 # Cleanup operations done by self._load(), if any.
422 self._operations = []
423 with tools.Profiler('Setup'):
424 with self._lock:
425 self._load(trim, time_fn)
426
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000427 # Cache interface implementation.
428
429 def __len__(self):
430 with self._lock:
431 return len(self._lru)
432
433 def __iter__(self):
434 # This is not thread-safe.
435 return self._lru.__iter__()
436
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400437 def __contains__(self, digest):
438 with self._lock:
439 return digest in self._lru
440
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400441 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400442 def total_size(self):
443 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000444 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400445
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000446 def get_oldest(self):
447 with self._lock:
448 try:
449 # (key, (value, ts))
450 return self._lru.get_oldest()[1][1]
451 except KeyError:
452 return None
453
454 def remove_oldest(self):
455 with self._lock:
456 # TODO(maruel): Update self._added.
457 return self._remove_lru_file(True)
458
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000459 def save(self):
460 with self._lock:
461 return self._save()
462
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000463 def trim(self):
464 """Forces retention policies."""
465 with self._lock:
466 return self._trim()
467
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400468 def cleanup(self):
469 """Cleans up the cache directory.
470
471 Ensures there is no unknown files in cache_dir.
472 Ensures the read-only bits are set correctly.
473
474 At that point, the cache was already loaded, trimmed to respect cache
475 policies.
476 """
Junji Watanabe66041012021-08-11 06:40:08 +0000477 logging.info('DiskContentAddressedCache.cleanup(): Cleaning %s',
478 self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400479 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000480 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400481 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000482 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400483 # It'd be faster if there were a readdir() function.
484 for filename in fs.listdir(self.cache_dir):
485 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000486 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400487 continue
488 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000489 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400490 previous.remove(filename)
491 continue
492
493 # An untracked file. Delete it.
Junji Watanabe66041012021-08-11 06:40:08 +0000494 logging.warning(
495 'DiskContentAddressedCache.cleanup(): Removing unknown file %s',
496 filename)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400497 p = self._path(filename)
498 if fs.isdir(p):
499 try:
500 file_path.rmtree(p)
501 except OSError:
502 pass
503 else:
504 file_path.try_remove(p)
505 continue
506
507 if previous:
508 # Filter out entries that were not found.
Junji Watanabe66041012021-08-11 06:40:08 +0000509 logging.warning(
510 'DiskContentAddressedCache.cleanup(): Removed %d lost files',
511 len(previous))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400512 for filename in previous:
513 self._lru.pop(filename)
514 self._save()
515
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000516 # Verify hash of every single item to detect corruption. the corrupted
517 # files will be evicted.
Junji Watanabe66041012021-08-11 06:40:08 +0000518 total = 0
519 verified = 0
520 deleted = 0
521 logging.info(
522 'DiskContentAddressedCache.cleanup(): Verifying modified files')
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000523 with self._lock:
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000524 for digest, (_, timestamp) in list(self._lru._items.items()):
Junji Watanabe66041012021-08-11 06:40:08 +0000525 total += 1
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000526 # verify only if the mtime is grather than the timestamp in state.json
527 # to avoid take too long time.
528 if self._get_mtime(digest) <= timestamp:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000529 continue
Junji Watanabe66041012021-08-11 06:40:08 +0000530 logging.warning(
531 'DiskContentAddressedCache.cleanup(): Item has been modified.'
532 ' verifying item: %s', digest)
533 is_valid = self._is_valid_hash(digest)
534 verified += 1
535 logging.warning(
536 'DiskContentAddressedCache.cleanup(): verified. is_valid: %s, '
537 'item: %s', is_valid, digest)
538 if is_valid:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000539 # Update timestamp in state.json
540 self._lru.touch(digest)
541 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000542 # remove corrupted file from LRU and file system
543 self._lru.pop(digest)
544 self._delete_file(digest, UNKNOWN_FILE_SIZE)
Junji Watanabe66041012021-08-11 06:40:08 +0000545 deleted += 1
546 logging.error(
547 'DiskContentAddressedCache.cleanup(): Deleted corrupted item: %s',
548 digest)
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000549 self._save()
Junji Watanabe66041012021-08-11 06:40:08 +0000550 logging.info(
551 'DiskContentAddressedCache.cleanup(): Verified modified files.'
552 ' total: %d, verified: %d, deleted: %d', total, verified, deleted)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400553
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000554 # ContentAddressedCache interface implementation.
555
556 def __contains__(self, digest):
557 with self._lock:
558 return digest in self._lru
559
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400560 def touch(self, digest, size):
561 """Verifies an actual file is valid and bumps its LRU position.
562
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000563 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400564
565 Note that is doesn't compute the hash so it could still be corrupted if the
566 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400567 """
568 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000569 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400570
571 # Update its LRU position.
572 with self._lock:
573 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000574 if looks_valid:
575 # Exists but not in the LRU anymore.
576 self._delete_file(digest, size)
577 return False
578 if not looks_valid:
579 self._lru.pop(digest)
580 # Exists but not in the LRU anymore.
581 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400582 return False
583 self._lru.touch(digest)
584 self._protected = self._protected or digest
585 return True
586
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400587 def getfileobj(self, digest):
588 try:
589 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400590 except IOError:
591 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000592 with self._lock:
593 try:
594 self._used.append(self._lru[digest])
595 except KeyError:
596 # If the digest is not actually in _lru, assume it is a cache miss.
597 # Existing file will be overwritten by whoever uses the cache and added
598 # to _lru.
599 f.close()
600 raise CacheMiss(digest)
601 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400602
603 def write(self, digest, content):
604 assert content is not None
605 with self._lock:
606 self._protected = self._protected or digest
607 path = self._path(digest)
608 # A stale broken file may remain. It is possible for the file to have write
609 # access bit removed which would cause the file_write() call to fail to open
610 # in write mode. Take no chance here.
611 file_path.try_remove(path)
612 try:
613 size = file_write(path, content)
614 except:
615 # There are two possible places were an exception can occur:
616 # 1) Inside |content| generator in case of network or unzipping errors.
617 # 2) Inside file_write itself in case of disk IO errors.
618 # In any case delete an incomplete file and propagate the exception to
619 # caller, it will be logged there.
620 file_path.try_remove(path)
621 raise
622 # Make the file read-only in the cache. This has a few side-effects since
623 # the file node is modified, so every directory entries to this file becomes
624 # read-only. It's fine here because it is a new file.
625 file_path.set_read_only(path, True)
626 with self._lock:
627 self._add(digest, size)
628 return digest
629
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000630 # Internal functions.
631
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400632 def _load(self, trim, time_fn):
633 """Loads state of the cache from json file.
634
635 If cache_dir does not exist on disk, it is created.
636 """
637 self._lock.assert_locked()
638
639 if not fs.isfile(self.state_file):
640 if not fs.isdir(self.cache_dir):
641 fs.makedirs(self.cache_dir)
642 else:
643 # Load state of the cache.
644 try:
645 self._lru = lru.LRUDict.load(self.state_file)
646 except ValueError as err:
647 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000648 # Don't want to keep broken cache dir.
649 file_path.rmtree(self.cache_dir)
650 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000651 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400652 if time_fn:
653 self._lru.time_fn = time_fn
654 if trim:
655 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400656
657 def _save(self):
658 """Saves the LRU ordering."""
659 self._lock.assert_locked()
660 if sys.platform != 'win32':
661 d = os.path.dirname(self.state_file)
662 if fs.isdir(d):
663 # Necessary otherwise the file can't be created.
664 file_path.set_read_only(d, False)
665 if fs.isfile(self.state_file):
666 file_path.set_read_only(self.state_file, False)
667 self._lru.save(self.state_file)
668
669 def _trim(self):
670 """Trims anything we don't know, make sure enough free space exists."""
671 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000672 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400673
674 # Trim old items.
675 if self.policies.max_age_secs:
676 cutoff = self._lru.time_fn() - self.policies.max_age_secs
677 while self._lru:
678 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000679 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400680 if oldest[1][1] >= cutoff:
681 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000682 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400683
684 # Ensure maximum cache size.
685 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000686 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400687 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000688 e = self._remove_lru_file(True)
689 evicted.append(e)
690 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400691
692 # Ensure maximum number of items in the cache.
693 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000694 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000695 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400696
697 # Ensure enough free space.
698 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400699 while (
700 self.policies.min_free_space and
701 self._lru and
702 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000703 # self._free_disk is updated by this call.
704 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400705
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000706 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000707 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400708 usage_percent = 0.
709 if total_usage:
710 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
711
712 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000713 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
714 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
Junji Watanabe38b28b02020-04-23 10:23:30 +0000715 '%.1fkb)', len(evicted),
716 sum(evicted) / 1024., self._free_disk / 1024., total_usage / 1024.,
717 usage_percent, self.policies.max_cache_size / 1024.)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400718 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000719 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400720
721 def _path(self, digest):
722 """Returns the path to one item."""
723 return os.path.join(self.cache_dir, digest)
724
725 def _remove_lru_file(self, allow_protected):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000726 """Removes the latest recently used file and returns its size.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000727
728 Updates self._free_disk.
729 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400730 self._lock.assert_locked()
731 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000732 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400733 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000734 total_size = sum(self._lru.values())
735 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000736 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
737 '%s bytes (%.3f GiB) free_space') % (
738 self.policies, total_size, float(total_size) / 1024**3,
739 len(self._lru), self._free_disk,
740 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400741 raise NoMoreSpace(msg)
742 except KeyError:
743 # That means an internal error.
744 raise NoMoreSpace('Nothing to remove, can\'t happend')
745 digest, (size, _) = self._lru.pop_oldest()
Takuto Ikuta8d8ca9b2021-02-26 02:31:43 +0000746 logging.debug('Removing LRU file %s with size %s bytes', digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400747 self._delete_file(digest, size)
748 return size
749
750 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
751 """Adds an item into LRU cache marking it as a newest one."""
752 self._lock.assert_locked()
753 if size == UNKNOWN_FILE_SIZE:
754 size = fs.stat(self._path(digest)).st_size
755 self._added.append(size)
756 self._lru.add(digest, size)
757 self._free_disk -= size
758 # Do a quicker version of self._trim(). It only enforces free disk space,
759 # not cache size limits. It doesn't actually look at real free disk space,
760 # only uses its cache values. self._trim() will be called later to enforce
761 # real trimming but doing this quick version here makes it possible to map
762 # an isolated that is larger than the current amount of free disk space when
763 # the cache size is already large.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000764 while (self.policies.min_free_space and self._lru and
765 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000766 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400767 if self._remove_lru_file(False) == -1:
768 break
769
770 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000771 """Deletes cache file from the file system.
772
773 Updates self._free_disk.
774 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400775 self._lock.assert_locked()
776 try:
777 if size == UNKNOWN_FILE_SIZE:
778 try:
779 size = fs.stat(self._path(digest)).st_size
780 except OSError:
781 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000782 if file_path.try_remove(self._path(digest)):
783 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400784 except OSError as e:
785 if e.errno != errno.ENOENT:
786 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400787
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000788 def _get_mtime(self, digest):
789 """Get mtime of cache file."""
790 return os.path.getmtime(self._path(digest))
791
792 def _is_valid_hash(self, digest):
793 """Verify digest with supported hash algos."""
794 for _, algo in isolated_format.SUPPORTED_ALGOS.items():
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000795 if digest == isolated_format.hash_file(self._path(digest), algo):
796 return True
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000797 return False
798
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400799
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400800class NamedCache(Cache):
801 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400802
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400803 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400804 name is a short identifier that describes the contents of the cache, e.g.
805 "git_v8" could be all git repositories required by v8 builds, or
806 "build_chromium" could be build artefacts of the Chromium.
807 path is a directory path relative to the task run dir. Cache installation
808 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400809 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400810 _DIR_ALPHABET = string.ascii_letters + string.digits
811 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000812 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400813
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400814 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400815 """Initializes NamedCaches.
816
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400817 Arguments:
818 - cache_dir is a directory for persistent cache storage.
819 - policies is a CachePolicies instance.
820 - time_fn is a function that returns timestamp (float) and used to take
821 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400822 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400823 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400824 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000825 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400826 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
827 self._lru = lru.LRUDict()
828 if not fs.isdir(self.cache_dir):
829 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000830 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000831 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400832 self._lru = lru.LRUDict.load(self.state_file)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000833 for _, size in self._lru.values():
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000834 if not isinstance(size, six.integer_types):
Takuto Ikuta6acf8f92020-07-02 02:06:42 +0000835 with open(self.state_file, 'r') as f:
836 logging.info('named cache state file: %s\n%s', self.state_file,
837 f.read())
Junji Watanabeedcf47d2020-06-11 08:41:01 +0000838 raise ValueError("size is not integer: %s" % size)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000839
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400840 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000841 logging.exception(
842 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400843 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000844 fs.makedirs(self.cache_dir)
Takuto Ikutadadfbb02020-07-10 03:31:26 +0000845 self._lru = lru.LRUDict()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000846 with self._lock:
847 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400848 if time_fn:
849 self._lru.time_fn = time_fn
850
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400851 @property
852 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000853 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400854 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000855 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400856
Takuto Ikutaeab23172020-07-02 03:50:02 +0000857 def _sudo_chown(self, path):
858 if sys.platform == 'win32':
859 return
860 uid = os.getuid()
861 if os.stat(path).st_uid == uid:
862 return
863 # Maybe owner of |path| is different from runner of this script. This is to
864 # make fs.rename work in that case.
865 # https://crbug.com/986676
866 subprocess.check_call(['sudo', '-n', 'chown', str(uid), path])
867
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000868 def install(self, dst, name):
869 """Creates the directory |dst| and moves a previous named cache |name| if it
870 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400871
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000872 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400873
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000874 Returns the reused named cache size in bytes, or 0 if none was present.
875
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400876 Raises NamedCacheError if cannot install the cache.
877 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000878 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400879 with self._lock:
880 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000881 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400882 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000883 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400884
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000885 # Remove the named symlink if it exists.
886 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000887 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000888 # Remove the symlink itself, not its destination.
889 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000890
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000891 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000892 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400893 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000894 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000895 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000896 file_path.ensure_tree(os.path.dirname(dst))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000897 self._sudo_chown(abs_cache)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000898 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400899 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000900 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400901
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000902 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400903 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400904
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000905 # The named cache does not exist, create an empty directory. When
906 # uninstalling, we will move it back to the cache and create an an
907 # entry.
908 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000909 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000910 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400911 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000912 # Raise using the original traceback.
913 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000914 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000915 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000916 finally:
917 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400918
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000919 def uninstall(self, src, name):
920 """Moves the cache directory back into the named cache hive for an eventual
921 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400922
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000923 The opposite of install().
924
925 src must be absolute and unicode. Its content is moved back into the local
926 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400927
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000928 Returns the named cache size in bytes.
929
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400930 Raises NamedCacheError if cannot uninstall the cache.
931 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000932 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Junji Watanabe9cdfff52021-01-08 07:20:35 +0000933 start = time.time()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400934 with self._lock:
935 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000936 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400937 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000938 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000939 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400940 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400941
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000942 if name in self._lru:
943 # This shouldn't happen but just remove the preexisting one and move
944 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000945 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000946 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000947
Takuto Ikuta93483272020-06-05 09:06:34 +0000948 # Calculate the size of the named cache to keep.
Takuto Ikuta995da062021-03-17 05:01:59 +0000949 size = file_path.get_recursive_size(src)
Takuto Ikuta262f8292020-08-26 01:54:22 +0000950 logging.info('- Size is %s', size)
951 if size is None:
952 # Do not save a named cache that was deleted.
953 return
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400954
955 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000956 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400957 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000958 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400959 file_path.ensure_tree(os.path.dirname(abs_cache))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000960 self._sudo_chown(src)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000961 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400962
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000963 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000964 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000965
966 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
967 # for user convenience.
968 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000969 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000970 file_path.remove(named_path)
971 else:
972 file_path.ensure_tree(os.path.dirname(named_path))
973
974 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000975 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000976 logging.info(
977 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000978 except OSError:
979 # Ignore on Windows. It happens when running as a normal user or when
980 # UAC is enabled and the user is a filtered administrator account.
981 if sys.platform != 'win32':
982 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000983 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400984 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000985 # Raise using the original traceback.
986 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000987 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000988 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000989 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000990 # Call save() at every uninstall. The assumptions are:
991 # - The total the number of named caches is low, so the state.json file
992 # is small, so the time it takes to write it to disk is short.
993 # - The number of mapped named caches per task is low, so the number of
994 # times save() is called on tear-down isn't high enough to be
995 # significant.
996 # - uninstall() sometimes throws due to file locking on Windows or
997 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000998 self._save()
Junji Watanabe9cdfff52021-01-08 07:20:35 +0000999 logging.info('NamedCache.uninstall(%r, %r) took %d seconds', src, name,
1000 time.time() - start)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001001
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001002 # Cache interface implementation.
1003
1004 def __len__(self):
1005 with self._lock:
1006 return len(self._lru)
1007
1008 def __iter__(self):
1009 # This is not thread-safe.
1010 return self._lru.__iter__()
1011
John Budorickc6186972020-02-26 00:58:14 +00001012 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001013 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +00001014 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001015
1016 @property
1017 def total_size(self):
1018 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001019 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001020
1021 def get_oldest(self):
1022 with self._lock:
1023 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001024 # (key, (value, ts))
1025 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001026 except KeyError:
1027 return None
1028
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001029 def remove_oldest(self):
1030 with self._lock:
1031 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001032 _name, size = self._remove_lru_item()
1033 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001034
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001035 def save(self):
1036 with self._lock:
1037 return self._save()
1038
John Budorickc6186972020-02-26 00:58:14 +00001039 def touch(self, *names):
1040 with self._lock:
1041 for name in names:
1042 if name in self._lru:
1043 self._lru.touch(name)
1044 self._save()
1045
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001046 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001047 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001048 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001049 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001050 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001051
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001052 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001053 if self._policies.max_items:
1054 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001055 name, size = self._remove_lru_item()
1056 evicted.append(size)
1057 logging.info(
1058 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1059 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001060
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001061 # Trim according to maximum age.
1062 if self._policies.max_age_secs:
1063 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1064 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001065 _name, (_data, ts) = self._lru.get_oldest()
1066 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001067 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001068 name, size = self._remove_lru_item()
1069 evicted.append(size)
1070 logging.info(
1071 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1072 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001073
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001074 # Trim according to minimum free space.
1075 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001076 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001077 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001078 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001079 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001080 name, size = self._remove_lru_item()
1081 evicted.append(size)
1082 logging.info(
1083 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1084 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001085
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001086 # Trim according to maximum total size.
1087 if self._policies.max_cache_size:
1088 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001089 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001090 if total <= self._policies.max_cache_size:
1091 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001092 name, size = self._remove_lru_item()
1093 evicted.append(size)
1094 logging.info(
1095 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1096 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001097
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001098 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001099 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001100
1101 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001102 """Removes unknown directories.
1103
1104 Does not recalculate the cache size since it's surprisingly slow on some
1105 OSes.
1106 """
Junji Watanabe66041012021-08-11 06:40:08 +00001107 logging.info('NamedCache.cleanup(): Cleaning %s', self.cache_dir)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001108 success = True
1109 with self._lock:
1110 try:
1111 actual = set(fs.listdir(self.cache_dir))
1112 actual.discard(self.NAMED_DIR)
1113 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001114 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001115 # First, handle the actual cache content.
1116 # Remove missing entries.
1117 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001118 name, size = self._lru.pop(expected[missing])
1119 logging.warning(
1120 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001121 # Remove unexpected items.
1122 for unexpected in (actual - set(expected)):
1123 try:
1124 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001125 logging.warning(
1126 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001127 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001128 file_path.rmtree(p)
1129 else:
1130 fs.remove(p)
1131 except (IOError, OSError) as e:
1132 logging.error('Failed to remove %s: %s', unexpected, e)
1133 success = False
1134
1135 # Second, fix named cache links.
1136 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001137 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001138 actual = set(fs.listdir(named))
1139 expected = set(self._lru)
1140 # Confirm entries. Do not add missing ones for now.
1141 for name in expected.intersection(actual):
1142 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001143 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001144 if fs.islink(p):
1145 link = fs.readlink(p)
1146 if expected_link == link:
1147 continue
1148 logging.warning(
1149 'Unexpected symlink for cache %s: %s, expected %s',
1150 name, link, expected_link)
1151 else:
1152 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001153 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001154 file_path.rmtree(p)
1155 else:
1156 fs.remove(p)
1157 # Remove unexpected items.
1158 for unexpected in (actual - expected):
1159 try:
1160 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1161 if fs.isdir(p):
1162 file_path.rmtree(p)
1163 else:
1164 fs.remove(p)
1165 except (IOError, OSError) as e:
1166 logging.error('Failed to remove %s: %s', unexpected, e)
1167 success = False
1168 finally:
1169 self._save()
1170 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001171
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001172 # Internal functions.
1173
1174 def _try_upgrade(self):
1175 """Upgrades from the old format to the new one if necessary.
1176
1177 This code can be removed so all bots are known to have the right new format.
1178 """
1179 if not self._lru:
1180 return
1181 _name, (data, _ts) = self._lru.get_oldest()
1182 if isinstance(data, (list, tuple)):
1183 return
1184 # Update to v2.
1185 def upgrade(_name, rel_cache):
1186 abs_cache = os.path.join(self.cache_dir, rel_cache)
Takuto Ikuta995da062021-03-17 05:01:59 +00001187 return rel_cache, file_path.get_recursive_size(abs_cache)
1188
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001189 self._lru.transform(upgrade)
1190 self._save()
1191
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001192 def _remove_lru_item(self):
1193 """Removes the oldest LRU entry. LRU must not be empty."""
1194 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
Takuto Ikuta74686842021-07-30 04:11:03 +00001195 logging.info('Removing named cache %r, %d', name, size)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001196 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001197 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001198
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001199 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001200 """Creates and returns relative path of a new cache directory.
1201
1202 In practice, it is a 2-letter string.
1203 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001204 # We randomly generate directory names that have two lower/upper case
1205 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1206 abc_len = len(self._DIR_ALPHABET)
1207 tried = set()
1208 while len(tried) < 1000:
1209 i = random.randint(0, abc_len * abc_len - 1)
1210 rel_path = (
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001211 self._DIR_ALPHABET[i // abc_len] + self._DIR_ALPHABET[i % abc_len])
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001212 if rel_path in tried:
1213 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001214 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001215 if not fs.exists(abs_path):
1216 return rel_path
1217 tried.add(rel_path)
1218 raise NamedCacheError(
1219 'could not allocate a new cache dir, too many cache dirs')
1220
1221 def _remove(self, name):
1222 """Removes a cache directory and entry.
1223
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001224 Returns:
1225 Number of caches deleted.
1226 """
1227 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001228 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001229 named_dir = self._get_named_path(name)
1230 if fs.islink(named_dir):
1231 fs.unlink(named_dir)
1232
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001233 # Then remove the actual data.
1234 if name not in self._lru:
1235 return
1236 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001237 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001238 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001239 file_path.rmtree(abs_path)
1240 self._lru.pop(name)
1241
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001242 def _save(self):
1243 self._lock.assert_locked()
1244 file_path.ensure_tree(self.cache_dir)
1245 self._lru.save(self.state_file)
1246
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001247 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001248 return os.path.join(self.cache_dir, self.NAMED_DIR, name)