blob: bb75d6f9b9e548127bb4310e80dbbbf0d4153a3c [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Takuto Ikuta2fe58fd2021-08-18 13:47:36 +00007from __future__ import print_function
8
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04009import errno
Takuto Ikuta922c8642021-11-18 07:42:16 +000010import hashlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040011import io
12import logging
13import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import random
15import string
Junji Watanabe7b720782020-07-01 01:51:07 +000016import subprocess
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040017import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000018import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040019
20from utils import file_path
21from utils import fs
22from utils import lru
23from utils import threading_utils
24from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000025tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040026
Lei Leife202df2019-06-11 17:33:34 +000027# third_party/
28import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040029
30# The file size to be used when we don't know the correct file size,
31# generally used for .isolated files.
32UNKNOWN_FILE_SIZE = None
33
34
35def file_write(path, content_generator):
36 """Writes file content as generated by content_generator.
37
38 Creates the intermediary directory as needed.
39
40 Returns the number of bytes written.
41
42 Meant to be mocked out in unit tests.
43 """
44 file_path.ensure_tree(os.path.dirname(path))
45 total = 0
46 with fs.open(path, 'wb') as f:
47 for d in content_generator:
48 total += len(d)
49 f.write(d)
50 return total
51
52
53def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000054 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040055
56 Currently it just checks the file exists and its size matches the expectation.
57 """
58 if size == UNKNOWN_FILE_SIZE:
59 return fs.isfile(path)
60 try:
61 actual_size = fs.stat(path).st_size
62 except OSError as e:
Junji Watanabe38b28b02020-04-23 10:23:30 +000063 logging.warning('Can\'t read item %s, assuming it\'s invalid: %s',
64 os.path.basename(path), e)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040065 return False
66 if size != actual_size:
67 logging.warning(
68 'Found invalid item %s; %d != %d',
69 os.path.basename(path), actual_size, size)
70 return False
71 return True
72
73
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000074def trim_caches(caches, path, min_free_space, max_age_secs):
75 """Trims multiple caches.
76
77 The goal here is to coherently trim all caches in a coherent LRU fashion,
78 deleting older items independent of which container they belong to.
79
80 Two policies are enforced first:
81 - max_age_secs
82 - min_free_space
83
84 Once that's done, then we enforce each cache's own policies.
85
86 Returns:
87 Slice containing the size of all items evicted.
88 """
89 min_ts = time.time() - max_age_secs if max_age_secs else 0
90 free_disk = file_path.get_free_space(path) if min_free_space else 0
Junji Watanabe66041012021-08-11 06:40:08 +000091 logging.info("Trimming caches. min_ts: %d, free_disk: %d, min_free_space: %d",
92 min_ts, free_disk, min_free_space)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000093 total = []
94 if min_ts or free_disk:
95 while True:
96 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
97 if not oldest:
98 break
Lei Leife202df2019-06-11 17:33:34 +000099 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000100 c, ts = oldest[0]
101 if ts >= min_ts and free_disk >= min_free_space:
102 break
103 total.append(c.remove_oldest())
104 if min_free_space:
105 free_disk = file_path.get_free_space(path)
Takuto Ikuta74686842021-07-30 04:11:03 +0000106 logging.info("free_disk after removing oldest entries: %d", free_disk)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000107 # Evaluate each cache's own policies.
108 for c in caches:
109 total.extend(c.trim())
110 return total
111
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000112
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400113class NamedCacheError(Exception):
114 """Named cache specific error."""
115
116
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400117class NoMoreSpace(Exception):
118 """Not enough space to map the whole directory."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400119
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400120
Junji Watanabeab2102a2022-01-12 01:44:04 +0000121class CachePolicies:
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400122 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
123 """Common caching policies for the multiple caches (isolated, named, cipd).
124
125 Arguments:
126 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
127 cache is effectively a leak.
128 - min_free_space: Trim if disk free space becomes lower than this value. If
129 0, it will unconditionally fill the disk.
130 - max_items: Maximum number of items to keep in the cache. If 0, do not
131 enforce a limit.
132 - max_age_secs: Maximum age an item is kept in the cache until it is
133 automatically evicted. Having a lot of dead luggage slows
134 everything down.
135 """
136 self.max_cache_size = max_cache_size
137 self.min_free_space = min_free_space
138 self.max_items = max_items
139 self.max_age_secs = max_age_secs
140
141 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000142 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
143 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
144 self.max_cache_size, float(self.max_cache_size) / 1024**3,
145 self.max_items, self.min_free_space,
146 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400147
148
149class CacheMiss(Exception):
150 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400151 def __init__(self, digest):
152 self.digest = digest
Junji Watanabe38b28b02020-04-23 10:23:30 +0000153 super(CacheMiss,
154 self).__init__('Item with digest %r is not found in cache' % digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400155
156
Junji Watanabeab2102a2022-01-12 01:44:04 +0000157class Cache:
Junji Watanabe38b28b02020-04-23 10:23:30 +0000158
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400159 def __init__(self, cache_dir):
160 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000161 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400162 assert file_path.isabs(cache_dir), cache_dir
163 self.cache_dir = cache_dir
164 self._lock = threading_utils.LockWithAssert()
165 # Profiling values.
166 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400167 self._used = []
168
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000169 def __nonzero__(self):
170 """A cache is always True.
171
172 Otherwise it falls back to __len__, which is surprising.
173 """
174 return True
175
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000176 def __bool__(self):
177 """A cache is always True.
178
179 Otherwise it falls back to __len__, which is surprising.
180 """
181 return True
182
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000183 def __len__(self):
184 """Returns the number of entries in the cache."""
185 raise NotImplementedError()
186
187 def __iter__(self):
188 """Iterates over all the entries names."""
189 raise NotImplementedError()
190
191 def __contains__(self, name):
192 """Returns if an entry is in the cache."""
193 raise NotImplementedError()
194
195 @property
196 def total_size(self):
197 """Returns the total size of the cache in bytes."""
198 raise NotImplementedError()
199
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400200 @property
201 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000202 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400203 with self._lock:
204 return self._added[:]
205
206 @property
207 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000208 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400209 with self._lock:
210 return self._used[:]
211
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000212 def get_oldest(self):
213 """Returns timestamp of oldest cache entry or None.
214
215 Returns:
216 Timestamp of the oldest item.
217
218 Used for manual trimming.
219 """
220 raise NotImplementedError()
221
222 def remove_oldest(self):
223 """Removes the oldest item from the cache.
224
225 Returns:
226 Size of the oldest item.
227
228 Used for manual trimming.
229 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400230 raise NotImplementedError()
231
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000232 def save(self):
233 """Saves the current cache to disk."""
234 raise NotImplementedError()
235
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400236 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000237 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400238
239 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000240 Slice with the size of evicted items.
241 """
242 raise NotImplementedError()
243
244 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000245 """Deletes any corrupted item from the cache, then calls trim(), then
246 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000247
248 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400249 """
250 raise NotImplementedError()
251
252
253class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400254 """Content addressed cache that stores objects temporarily.
255
256 It can be accessed concurrently from multiple threads, so it should protect
257 its internal state with some lock.
258 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400259
260 def __enter__(self):
261 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000262 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400263 return self
264
265 def __exit__(self, _exc_type, _exec_value, _traceback):
266 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000267 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400268 return False
269
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400270 def touch(self, digest, size):
271 """Ensures item is not corrupted and updates its LRU position.
272
273 Arguments:
274 digest: hash digest of item to check.
275 size: expected size of this item.
276
277 Returns:
278 True if item is in cache and not corrupted.
279 """
280 raise NotImplementedError()
281
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400282 def getfileobj(self, digest):
283 """Returns a readable file like object.
284
285 If file exists on the file system it will have a .name attribute with an
286 absolute path to the file.
287 """
288 raise NotImplementedError()
289
290 def write(self, digest, content):
291 """Reads data from |content| generator and stores it in cache.
292
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000293 It is possible to write to an object that already exists. It may be
294 ignored (sent to /dev/null) but the timestamp is still updated.
295
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400296 Returns digest to simplify chaining.
297 """
298 raise NotImplementedError()
299
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400300
301class MemoryContentAddressedCache(ContentAddressedCache):
302 """ContentAddressedCache implementation that stores everything in memory."""
303
Lei Leife202df2019-06-11 17:33:34 +0000304 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400305 """Args:
306 file_mode_mask: bit mask to AND file mode with. Default value will make
307 all mapped files to be read only.
308 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400309 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400310 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000311 # Items in a LRU lookup dict(digest: size).
312 self._lru = lru.LRUDict()
313
314 # Cache interface implementation.
315
316 def __len__(self):
317 with self._lock:
318 return len(self._lru)
319
320 def __iter__(self):
321 # This is not thread-safe.
322 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400323
324 def __contains__(self, digest):
325 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000326 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400327
328 @property
329 def total_size(self):
330 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000331 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400332
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000333 def get_oldest(self):
334 with self._lock:
335 try:
336 # (key, (value, ts))
337 return self._lru.get_oldest()[1][1]
338 except KeyError:
339 return None
340
341 def remove_oldest(self):
342 with self._lock:
343 # TODO(maruel): Update self._added.
344 # (key, (value, ts))
345 return len(self._lru.pop_oldest()[1][0])
346
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000347 def save(self):
348 pass
349
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000350 def trim(self):
351 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000352 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400353
354 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000355 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400356
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000357 # ContentAddressedCache interface implementation.
358
359 def __contains__(self, digest):
360 with self._lock:
361 return digest in self._lru
362
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400363 def touch(self, digest, size):
364 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000365 try:
366 self._lru.touch(digest)
367 except KeyError:
368 return False
369 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400370
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400371 def getfileobj(self, digest):
372 with self._lock:
373 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000374 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400375 except KeyError:
376 raise CacheMiss(digest)
377 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000378 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400379 return io.BytesIO(d)
380
381 def write(self, digest, content):
382 # Assemble whole stream before taking the lock.
Lei Lei73a5f732020-03-23 20:36:14 +0000383 data = six.b('').join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400384 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000385 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400386 self._added.append(len(data))
387 return digest
388
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400389
390class DiskContentAddressedCache(ContentAddressedCache):
391 """Stateful LRU cache in a flat hash table in a directory.
392
393 Saves its state as json file.
394 """
395 STATE_FILE = u'state.json'
396
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000397 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400398 """
399 Arguments:
400 cache_dir: directory where to place the cache.
401 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400402 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000403 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400404 """
405 # All protected methods (starting with '_') except _path should be called
406 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400407 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400408 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400409 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
410 # Items in a LRU lookup dict(digest: size).
411 self._lru = lru.LRUDict()
412 # Current cached free disk space. It is updated by self._trim().
413 file_path.ensure_tree(self.cache_dir)
414 self._free_disk = file_path.get_free_space(self.cache_dir)
415 # The first item in the LRU cache that must not be evicted during this run
416 # since it was referenced. All items more recent that _protected in the LRU
417 # cache are also inherently protected. It could be a set() of all items
418 # referenced but this increases memory usage without a use case.
419 self._protected = None
420 # Cleanup operations done by self._load(), if any.
421 self._operations = []
422 with tools.Profiler('Setup'):
423 with self._lock:
424 self._load(trim, time_fn)
425
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000426 # Cache interface implementation.
427
428 def __len__(self):
429 with self._lock:
430 return len(self._lru)
431
432 def __iter__(self):
433 # This is not thread-safe.
434 return self._lru.__iter__()
435
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400436 def __contains__(self, digest):
437 with self._lock:
438 return digest in self._lru
439
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400440 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400441 def total_size(self):
442 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000443 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400444
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000445 def get_oldest(self):
446 with self._lock:
447 try:
448 # (key, (value, ts))
449 return self._lru.get_oldest()[1][1]
450 except KeyError:
451 return None
452
453 def remove_oldest(self):
454 with self._lock:
455 # TODO(maruel): Update self._added.
456 return self._remove_lru_file(True)
457
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000458 def save(self):
459 with self._lock:
460 return self._save()
461
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000462 def trim(self):
463 """Forces retention policies."""
464 with self._lock:
465 return self._trim()
466
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400467 def cleanup(self):
468 """Cleans up the cache directory.
469
470 Ensures there is no unknown files in cache_dir.
471 Ensures the read-only bits are set correctly.
472
473 At that point, the cache was already loaded, trimmed to respect cache
474 policies.
475 """
Junji Watanabe66041012021-08-11 06:40:08 +0000476 logging.info('DiskContentAddressedCache.cleanup(): Cleaning %s',
477 self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400478 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000479 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400480 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000481 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400482 # It'd be faster if there were a readdir() function.
483 for filename in fs.listdir(self.cache_dir):
484 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000485 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400486 continue
487 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000488 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400489 previous.remove(filename)
490 continue
491
492 # An untracked file. Delete it.
Junji Watanabe66041012021-08-11 06:40:08 +0000493 logging.warning(
494 'DiskContentAddressedCache.cleanup(): Removing unknown file %s',
495 filename)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400496 p = self._path(filename)
497 if fs.isdir(p):
498 try:
499 file_path.rmtree(p)
500 except OSError:
501 pass
502 else:
503 file_path.try_remove(p)
504 continue
505
506 if previous:
507 # Filter out entries that were not found.
Junji Watanabe66041012021-08-11 06:40:08 +0000508 logging.warning(
509 'DiskContentAddressedCache.cleanup(): Removed %d lost files',
510 len(previous))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400511 for filename in previous:
512 self._lru.pop(filename)
513 self._save()
514
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000515 # Verify hash of every single item to detect corruption. the corrupted
516 # files will be evicted.
Junji Watanabe66041012021-08-11 06:40:08 +0000517 total = 0
518 verified = 0
519 deleted = 0
520 logging.info(
521 'DiskContentAddressedCache.cleanup(): Verifying modified files')
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000522 with self._lock:
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000523 for digest, (_, timestamp) in list(self._lru._items.items()):
Junji Watanabe66041012021-08-11 06:40:08 +0000524 total += 1
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000525 # verify only if the mtime is grather than the timestamp in state.json
526 # to avoid take too long time.
527 if self._get_mtime(digest) <= timestamp:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000528 continue
Junji Watanabe66041012021-08-11 06:40:08 +0000529 logging.warning(
530 'DiskContentAddressedCache.cleanup(): Item has been modified.'
531 ' verifying item: %s', digest)
532 is_valid = self._is_valid_hash(digest)
533 verified += 1
534 logging.warning(
535 'DiskContentAddressedCache.cleanup(): verified. is_valid: %s, '
536 'item: %s', is_valid, digest)
537 if is_valid:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000538 # Update timestamp in state.json
539 self._lru.touch(digest)
540 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000541 # remove corrupted file from LRU and file system
542 self._lru.pop(digest)
543 self._delete_file(digest, UNKNOWN_FILE_SIZE)
Junji Watanabe66041012021-08-11 06:40:08 +0000544 deleted += 1
545 logging.error(
546 'DiskContentAddressedCache.cleanup(): Deleted corrupted item: %s',
547 digest)
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000548 self._save()
Junji Watanabe66041012021-08-11 06:40:08 +0000549 logging.info(
550 'DiskContentAddressedCache.cleanup(): Verified modified files.'
551 ' total: %d, verified: %d, deleted: %d', total, verified, deleted)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400552
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000553 # ContentAddressedCache interface implementation.
554
555 def __contains__(self, digest):
556 with self._lock:
557 return digest in self._lru
558
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400559 def touch(self, digest, size):
560 """Verifies an actual file is valid and bumps its LRU position.
561
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000562 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400563
564 Note that is doesn't compute the hash so it could still be corrupted if the
565 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400566 """
567 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000568 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400569
570 # Update its LRU position.
571 with self._lock:
572 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000573 if looks_valid:
574 # Exists but not in the LRU anymore.
575 self._delete_file(digest, size)
576 return False
577 if not looks_valid:
578 self._lru.pop(digest)
579 # Exists but not in the LRU anymore.
580 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400581 return False
582 self._lru.touch(digest)
583 self._protected = self._protected or digest
584 return True
585
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400586 def getfileobj(self, digest):
587 try:
588 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400589 except IOError:
590 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000591 with self._lock:
592 try:
593 self._used.append(self._lru[digest])
594 except KeyError:
595 # If the digest is not actually in _lru, assume it is a cache miss.
596 # Existing file will be overwritten by whoever uses the cache and added
597 # to _lru.
598 f.close()
599 raise CacheMiss(digest)
600 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400601
602 def write(self, digest, content):
603 assert content is not None
604 with self._lock:
605 self._protected = self._protected or digest
606 path = self._path(digest)
607 # A stale broken file may remain. It is possible for the file to have write
608 # access bit removed which would cause the file_write() call to fail to open
609 # in write mode. Take no chance here.
610 file_path.try_remove(path)
611 try:
612 size = file_write(path, content)
613 except:
614 # There are two possible places were an exception can occur:
615 # 1) Inside |content| generator in case of network or unzipping errors.
616 # 2) Inside file_write itself in case of disk IO errors.
617 # In any case delete an incomplete file and propagate the exception to
618 # caller, it will be logged there.
619 file_path.try_remove(path)
620 raise
621 # Make the file read-only in the cache. This has a few side-effects since
622 # the file node is modified, so every directory entries to this file becomes
623 # read-only. It's fine here because it is a new file.
624 file_path.set_read_only(path, True)
625 with self._lock:
626 self._add(digest, size)
627 return digest
628
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000629 # Internal functions.
630
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400631 def _load(self, trim, time_fn):
632 """Loads state of the cache from json file.
633
634 If cache_dir does not exist on disk, it is created.
635 """
636 self._lock.assert_locked()
637
638 if not fs.isfile(self.state_file):
639 if not fs.isdir(self.cache_dir):
640 fs.makedirs(self.cache_dir)
641 else:
642 # Load state of the cache.
643 try:
644 self._lru = lru.LRUDict.load(self.state_file)
645 except ValueError as err:
646 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000647 # Don't want to keep broken cache dir.
648 file_path.rmtree(self.cache_dir)
649 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000650 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400651 if time_fn:
652 self._lru.time_fn = time_fn
653 if trim:
654 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400655
656 def _save(self):
657 """Saves the LRU ordering."""
658 self._lock.assert_locked()
659 if sys.platform != 'win32':
660 d = os.path.dirname(self.state_file)
661 if fs.isdir(d):
662 # Necessary otherwise the file can't be created.
663 file_path.set_read_only(d, False)
664 if fs.isfile(self.state_file):
665 file_path.set_read_only(self.state_file, False)
666 self._lru.save(self.state_file)
667
668 def _trim(self):
669 """Trims anything we don't know, make sure enough free space exists."""
670 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000671 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400672
673 # Trim old items.
674 if self.policies.max_age_secs:
675 cutoff = self._lru.time_fn() - self.policies.max_age_secs
676 while self._lru:
677 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000678 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400679 if oldest[1][1] >= cutoff:
680 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000681 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400682
683 # Ensure maximum cache size.
684 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000685 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400686 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000687 e = self._remove_lru_file(True)
688 evicted.append(e)
689 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400690
691 # Ensure maximum number of items in the cache.
692 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000693 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000694 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400695
696 # Ensure enough free space.
697 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400698 while (
699 self.policies.min_free_space and
700 self._lru and
701 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000702 # self._free_disk is updated by this call.
703 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400704
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000705 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000706 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400707 usage_percent = 0.
708 if total_usage:
709 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
710
711 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000712 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
713 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
Junji Watanabe38b28b02020-04-23 10:23:30 +0000714 '%.1fkb)', len(evicted),
715 sum(evicted) / 1024., self._free_disk / 1024., total_usage / 1024.,
716 usage_percent, self.policies.max_cache_size / 1024.)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400717 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000718 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400719
720 def _path(self, digest):
721 """Returns the path to one item."""
722 return os.path.join(self.cache_dir, digest)
723
724 def _remove_lru_file(self, allow_protected):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000725 """Removes the latest recently used file and returns its size.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000726
727 Updates self._free_disk.
728 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400729 self._lock.assert_locked()
730 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000731 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400732 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000733 total_size = sum(self._lru.values())
734 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000735 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
736 '%s bytes (%.3f GiB) free_space') % (
737 self.policies, total_size, float(total_size) / 1024**3,
738 len(self._lru), self._free_disk,
739 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400740 raise NoMoreSpace(msg)
741 except KeyError:
742 # That means an internal error.
743 raise NoMoreSpace('Nothing to remove, can\'t happend')
744 digest, (size, _) = self._lru.pop_oldest()
Takuto Ikuta8d8ca9b2021-02-26 02:31:43 +0000745 logging.debug('Removing LRU file %s with size %s bytes', digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400746 self._delete_file(digest, size)
747 return size
748
749 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
750 """Adds an item into LRU cache marking it as a newest one."""
751 self._lock.assert_locked()
752 if size == UNKNOWN_FILE_SIZE:
753 size = fs.stat(self._path(digest)).st_size
754 self._added.append(size)
755 self._lru.add(digest, size)
756 self._free_disk -= size
757 # Do a quicker version of self._trim(). It only enforces free disk space,
758 # not cache size limits. It doesn't actually look at real free disk space,
759 # only uses its cache values. self._trim() will be called later to enforce
760 # real trimming but doing this quick version here makes it possible to map
761 # an isolated that is larger than the current amount of free disk space when
762 # the cache size is already large.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000763 while (self.policies.min_free_space and self._lru and
764 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000765 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400766 if self._remove_lru_file(False) == -1:
767 break
768
769 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000770 """Deletes cache file from the file system.
771
772 Updates self._free_disk.
773 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400774 self._lock.assert_locked()
775 try:
776 if size == UNKNOWN_FILE_SIZE:
777 try:
778 size = fs.stat(self._path(digest)).st_size
779 except OSError:
780 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000781 if file_path.try_remove(self._path(digest)):
782 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400783 except OSError as e:
784 if e.errno != errno.ENOENT:
785 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400786
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000787 def _get_mtime(self, digest):
788 """Get mtime of cache file."""
789 return os.path.getmtime(self._path(digest))
790
791 def _is_valid_hash(self, digest):
792 """Verify digest with supported hash algos."""
Takuto Ikuta922c8642021-11-18 07:42:16 +0000793 d = hashlib.sha256()
794 with fs.open(self._path(digest), 'rb') as f:
795 while True:
796 chunk = f.read(1024 * 1024)
797 if not chunk:
798 break
799 d.update(chunk)
800 return digest == d.hexdigest()
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000801
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400802
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400803class NamedCache(Cache):
804 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400805
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400806 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400807 name is a short identifier that describes the contents of the cache, e.g.
808 "git_v8" could be all git repositories required by v8 builds, or
809 "build_chromium" could be build artefacts of the Chromium.
810 path is a directory path relative to the task run dir. Cache installation
811 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400812 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400813 _DIR_ALPHABET = string.ascii_letters + string.digits
814 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000815 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400816
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400817 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400818 """Initializes NamedCaches.
819
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400820 Arguments:
821 - cache_dir is a directory for persistent cache storage.
822 - policies is a CachePolicies instance.
823 - time_fn is a function that returns timestamp (float) and used to take
824 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400825 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400826 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400827 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000828 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400829 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
830 self._lru = lru.LRUDict()
831 if not fs.isdir(self.cache_dir):
832 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000833 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000834 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400835 self._lru = lru.LRUDict.load(self.state_file)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000836 for _, size in self._lru.values():
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000837 if not isinstance(size, six.integer_types):
Takuto Ikuta6acf8f92020-07-02 02:06:42 +0000838 with open(self.state_file, 'r') as f:
839 logging.info('named cache state file: %s\n%s', self.state_file,
840 f.read())
Junji Watanabeedcf47d2020-06-11 08:41:01 +0000841 raise ValueError("size is not integer: %s" % size)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000842
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400843 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000844 logging.exception(
845 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400846 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000847 fs.makedirs(self.cache_dir)
Takuto Ikutadadfbb02020-07-10 03:31:26 +0000848 self._lru = lru.LRUDict()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000849 with self._lock:
850 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400851 if time_fn:
852 self._lru.time_fn = time_fn
853
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400854 @property
855 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000856 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400857 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000858 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400859
Takuto Ikutaeab23172020-07-02 03:50:02 +0000860 def _sudo_chown(self, path):
861 if sys.platform == 'win32':
862 return
863 uid = os.getuid()
864 if os.stat(path).st_uid == uid:
865 return
866 # Maybe owner of |path| is different from runner of this script. This is to
867 # make fs.rename work in that case.
868 # https://crbug.com/986676
869 subprocess.check_call(['sudo', '-n', 'chown', str(uid), path])
870
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000871 def install(self, dst, name):
872 """Creates the directory |dst| and moves a previous named cache |name| if it
873 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400874
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000875 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400876
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000877 Returns the reused named cache size in bytes, or 0 if none was present.
878
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400879 Raises NamedCacheError if cannot install the cache.
880 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000881 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400882 with self._lock:
883 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000884 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400885 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000886 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400887
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000888 # Remove the named symlink if it exists.
889 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000890 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000891 # Remove the symlink itself, not its destination.
892 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000893
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000894 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000895 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400896 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000897 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000898 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000899 file_path.ensure_tree(os.path.dirname(dst))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000900 self._sudo_chown(abs_cache)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000901 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400902 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000903 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400904
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000905 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400906 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400907
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000908 # The named cache does not exist, create an empty directory. When
909 # uninstalling, we will move it back to the cache and create an an
910 # entry.
911 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000912 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000913 return 0
Junji Watanabed2ab86b2021-08-13 07:20:23 +0000914 except (IOError, OSError, PermissionError) as ex:
Takuto Ikuta2fe58fd2021-08-18 13:47:36 +0000915 if sys.platform == 'win32':
916 print("There may be running process in cache"
917 " e.g. https://crbug.com/1239809#c14",
918 file=sys.stderr)
919 subprocess.check_call(
920 ["powershell", "get-process | select path,starttime"])
921
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000922 # Raise using the original traceback.
923 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000924 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000925 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000926 finally:
927 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400928
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000929 def uninstall(self, src, name):
930 """Moves the cache directory back into the named cache hive for an eventual
931 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400932
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000933 The opposite of install().
934
935 src must be absolute and unicode. Its content is moved back into the local
936 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400937
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000938 Returns the named cache size in bytes.
939
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400940 Raises NamedCacheError if cannot uninstall the cache.
941 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000942 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Junji Watanabe9cdfff52021-01-08 07:20:35 +0000943 start = time.time()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400944 with self._lock:
945 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000946 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400947 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000948 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000949 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400950 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400951
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000952 if name in self._lru:
953 # This shouldn't happen but just remove the preexisting one and move
954 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000955 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000956 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000957
Takuto Ikutac1bdcf22021-10-27 05:07:26 +0000958 # Calculate the size of the named cache to keep. It's important because
959 # if size is zero (it's empty), we do not want to add it back to the
960 # named caches cache.
Takuto Ikuta995da062021-03-17 05:01:59 +0000961 size = file_path.get_recursive_size(src)
Takuto Ikutac1bdcf22021-10-27 05:07:26 +0000962 logging.info('- Size is %d', size)
963 if not size:
964 # Do not save empty named cache.
965 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400966
967 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000968 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400969 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000970 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400971 file_path.ensure_tree(os.path.dirname(abs_cache))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000972 self._sudo_chown(src)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000973 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400974
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000975 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000976 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000977
978 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
979 # for user convenience.
980 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000981 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000982 file_path.remove(named_path)
983 else:
984 file_path.ensure_tree(os.path.dirname(named_path))
985
986 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000987 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000988 logging.info(
989 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000990 except OSError:
991 # Ignore on Windows. It happens when running as a normal user or when
992 # UAC is enabled and the user is a filtered administrator account.
993 if sys.platform != 'win32':
994 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000995 return size
Junji Watanabed2ab86b2021-08-13 07:20:23 +0000996 except (IOError, OSError, PermissionError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000997 # Raise using the original traceback.
998 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000999 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001000 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001001 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001002 # Call save() at every uninstall. The assumptions are:
1003 # - The total the number of named caches is low, so the state.json file
1004 # is small, so the time it takes to write it to disk is short.
1005 # - The number of mapped named caches per task is low, so the number of
1006 # times save() is called on tear-down isn't high enough to be
1007 # significant.
1008 # - uninstall() sometimes throws due to file locking on Windows or
1009 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001010 self._save()
Junji Watanabe9cdfff52021-01-08 07:20:35 +00001011 logging.info('NamedCache.uninstall(%r, %r) took %d seconds', src, name,
1012 time.time() - start)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001013
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001014 # Cache interface implementation.
1015
1016 def __len__(self):
1017 with self._lock:
1018 return len(self._lru)
1019
1020 def __iter__(self):
1021 # This is not thread-safe.
1022 return self._lru.__iter__()
1023
John Budorickc6186972020-02-26 00:58:14 +00001024 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001025 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +00001026 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001027
1028 @property
1029 def total_size(self):
1030 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001031 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001032
1033 def get_oldest(self):
1034 with self._lock:
1035 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001036 # (key, (value, ts))
1037 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001038 except KeyError:
1039 return None
1040
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001041 def remove_oldest(self):
1042 with self._lock:
1043 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001044 _name, size = self._remove_lru_item()
1045 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001046
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001047 def save(self):
1048 with self._lock:
1049 return self._save()
1050
John Budorickc6186972020-02-26 00:58:14 +00001051 def touch(self, *names):
1052 with self._lock:
1053 for name in names:
1054 if name in self._lru:
1055 self._lru.touch(name)
1056 self._save()
1057
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001058 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001059 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001060 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001061 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001062 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001063
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001064 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001065 if self._policies.max_items:
1066 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001067 name, size = self._remove_lru_item()
1068 evicted.append(size)
1069 logging.info(
1070 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1071 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001072
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001073 # Trim according to maximum age.
1074 if self._policies.max_age_secs:
1075 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1076 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001077 _name, (_data, ts) = self._lru.get_oldest()
1078 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001079 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001080 name, size = self._remove_lru_item()
1081 evicted.append(size)
1082 logging.info(
1083 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1084 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001085
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001086 # Trim according to minimum free space.
1087 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001088 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001089 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001090 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001091 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001092 name, size = self._remove_lru_item()
1093 evicted.append(size)
1094 logging.info(
1095 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1096 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001097
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001098 # Trim according to maximum total size.
1099 if self._policies.max_cache_size:
1100 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001101 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001102 if total <= self._policies.max_cache_size:
1103 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001104 name, size = self._remove_lru_item()
1105 evicted.append(size)
1106 logging.info(
1107 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1108 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001109
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001110 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001111 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001112
1113 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001114 """Removes unknown directories.
1115
1116 Does not recalculate the cache size since it's surprisingly slow on some
1117 OSes.
1118 """
Junji Watanabe66041012021-08-11 06:40:08 +00001119 logging.info('NamedCache.cleanup(): Cleaning %s', self.cache_dir)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001120 success = True
1121 with self._lock:
1122 try:
1123 actual = set(fs.listdir(self.cache_dir))
1124 actual.discard(self.NAMED_DIR)
1125 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001126 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001127 # First, handle the actual cache content.
1128 # Remove missing entries.
1129 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001130 name, size = self._lru.pop(expected[missing])
1131 logging.warning(
1132 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001133 # Remove unexpected items.
1134 for unexpected in (actual - set(expected)):
1135 try:
1136 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001137 logging.warning(
1138 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001139 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001140 file_path.rmtree(p)
1141 else:
1142 fs.remove(p)
1143 except (IOError, OSError) as e:
1144 logging.error('Failed to remove %s: %s', unexpected, e)
1145 success = False
1146
1147 # Second, fix named cache links.
1148 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001149 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001150 actual = set(fs.listdir(named))
1151 expected = set(self._lru)
1152 # Confirm entries. Do not add missing ones for now.
1153 for name in expected.intersection(actual):
1154 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001155 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001156 if fs.islink(p):
1157 link = fs.readlink(p)
1158 if expected_link == link:
1159 continue
1160 logging.warning(
1161 'Unexpected symlink for cache %s: %s, expected %s',
1162 name, link, expected_link)
1163 else:
1164 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001165 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001166 file_path.rmtree(p)
1167 else:
1168 fs.remove(p)
1169 # Remove unexpected items.
1170 for unexpected in (actual - expected):
1171 try:
1172 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1173 if fs.isdir(p):
1174 file_path.rmtree(p)
1175 else:
1176 fs.remove(p)
1177 except (IOError, OSError) as e:
1178 logging.error('Failed to remove %s: %s', unexpected, e)
1179 success = False
1180 finally:
1181 self._save()
1182 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001183
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001184 # Internal functions.
1185
1186 def _try_upgrade(self):
1187 """Upgrades from the old format to the new one if necessary.
1188
1189 This code can be removed so all bots are known to have the right new format.
1190 """
1191 if not self._lru:
1192 return
1193 _name, (data, _ts) = self._lru.get_oldest()
1194 if isinstance(data, (list, tuple)):
1195 return
1196 # Update to v2.
1197 def upgrade(_name, rel_cache):
1198 abs_cache = os.path.join(self.cache_dir, rel_cache)
Takuto Ikuta995da062021-03-17 05:01:59 +00001199 return rel_cache, file_path.get_recursive_size(abs_cache)
1200
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001201 self._lru.transform(upgrade)
1202 self._save()
1203
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001204 def _remove_lru_item(self):
1205 """Removes the oldest LRU entry. LRU must not be empty."""
1206 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
Takuto Ikuta74686842021-07-30 04:11:03 +00001207 logging.info('Removing named cache %r, %d', name, size)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001208 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001209 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001210
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001211 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001212 """Creates and returns relative path of a new cache directory.
1213
1214 In practice, it is a 2-letter string.
1215 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001216 # We randomly generate directory names that have two lower/upper case
1217 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1218 abc_len = len(self._DIR_ALPHABET)
1219 tried = set()
1220 while len(tried) < 1000:
1221 i = random.randint(0, abc_len * abc_len - 1)
1222 rel_path = (
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001223 self._DIR_ALPHABET[i // abc_len] + self._DIR_ALPHABET[i % abc_len])
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001224 if rel_path in tried:
1225 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001226 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001227 if not fs.exists(abs_path):
1228 return rel_path
1229 tried.add(rel_path)
1230 raise NamedCacheError(
1231 'could not allocate a new cache dir, too many cache dirs')
1232
1233 def _remove(self, name):
1234 """Removes a cache directory and entry.
1235
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001236 Returns:
1237 Number of caches deleted.
1238 """
1239 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001240 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001241 named_dir = self._get_named_path(name)
1242 if fs.islink(named_dir):
1243 fs.unlink(named_dir)
1244
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001245 # Then remove the actual data.
1246 if name not in self._lru:
1247 return
1248 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001249 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001250 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001251 file_path.rmtree(abs_path)
1252 self._lru.pop(name)
1253
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001254 def _save(self):
1255 self._lock.assert_locked()
1256 file_path.ensure_tree(self.cache_dir)
1257 self._lru.save(self.state_file)
1258
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001259 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001260 return os.path.join(self.cache_dir, self.NAMED_DIR, name)