blob: f3809c9a751c79688c05496256b04a3ce9d7cecb [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Takuto Ikuta2fe58fd2021-08-18 13:47:36 +00007from __future__ import print_function
8
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04009import errno
Takuto Ikuta922c8642021-11-18 07:42:16 +000010import hashlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040011import io
12import logging
13import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import random
15import string
Junji Watanabe7b720782020-07-01 01:51:07 +000016import subprocess
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040017import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000018import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040019
20from utils import file_path
21from utils import fs
22from utils import lru
23from utils import threading_utils
24from utils import tools
Jonah Hooper9b5bd8c2022-07-21 15:33:41 +000025from utils import logging_utils
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040026
27# The file size to be used when we don't know the correct file size,
28# generally used for .isolated files.
29UNKNOWN_FILE_SIZE = None
30
31
32def file_write(path, content_generator):
33 """Writes file content as generated by content_generator.
34
35 Creates the intermediary directory as needed.
36
37 Returns the number of bytes written.
38
39 Meant to be mocked out in unit tests.
40 """
41 file_path.ensure_tree(os.path.dirname(path))
42 total = 0
43 with fs.open(path, 'wb') as f:
44 for d in content_generator:
45 total += len(d)
46 f.write(d)
47 return total
48
49
50def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000051 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040052
53 Currently it just checks the file exists and its size matches the expectation.
54 """
55 if size == UNKNOWN_FILE_SIZE:
56 return fs.isfile(path)
57 try:
58 actual_size = fs.stat(path).st_size
59 except OSError as e:
Junji Watanabe38b28b02020-04-23 10:23:30 +000060 logging.warning('Can\'t read item %s, assuming it\'s invalid: %s',
61 os.path.basename(path), e)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040062 return False
63 if size != actual_size:
64 logging.warning(
65 'Found invalid item %s; %d != %d',
66 os.path.basename(path), actual_size, size)
67 return False
68 return True
69
70
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000071def trim_caches(caches, path, min_free_space, max_age_secs):
72 """Trims multiple caches.
73
74 The goal here is to coherently trim all caches in a coherent LRU fashion,
75 deleting older items independent of which container they belong to.
76
77 Two policies are enforced first:
78 - max_age_secs
79 - min_free_space
80
81 Once that's done, then we enforce each cache's own policies.
82
83 Returns:
84 Slice containing the size of all items evicted.
85 """
86 min_ts = time.time() - max_age_secs if max_age_secs else 0
87 free_disk = file_path.get_free_space(path) if min_free_space else 0
Jonah Hooper9b5bd8c2022-07-21 15:33:41 +000088 logging_utils.user_logs(
89 "Trimming caches. min_ts: %d, free_disk: %d, min_free_space: %d", min_ts,
90 free_disk, min_free_space)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000091 total = []
92 if min_ts or free_disk:
93 while True:
94 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
95 if not oldest:
96 break
Lei Leife202df2019-06-11 17:33:34 +000097 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000098 c, ts = oldest[0]
99 if ts >= min_ts and free_disk >= min_free_space:
100 break
101 total.append(c.remove_oldest())
102 if min_free_space:
103 free_disk = file_path.get_free_space(path)
Takuto Ikuta74686842021-07-30 04:11:03 +0000104 logging.info("free_disk after removing oldest entries: %d", free_disk)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000105 # Evaluate each cache's own policies.
106 for c in caches:
Jonah Hooper9b5bd8c2022-07-21 15:33:41 +0000107 logging_utils.user_logs("trimming cache with dir %s", c.cache_dir)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000108 total.extend(c.trim())
109 return total
110
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000111
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400112class NamedCacheError(Exception):
113 """Named cache specific error."""
114
115
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400116class NoMoreSpace(Exception):
117 """Not enough space to map the whole directory."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400118
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400119
Junji Watanabeab2102a2022-01-12 01:44:04 +0000120class CachePolicies:
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400121 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
122 """Common caching policies for the multiple caches (isolated, named, cipd).
123
124 Arguments:
125 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
126 cache is effectively a leak.
127 - min_free_space: Trim if disk free space becomes lower than this value. If
128 0, it will unconditionally fill the disk.
129 - max_items: Maximum number of items to keep in the cache. If 0, do not
130 enforce a limit.
131 - max_age_secs: Maximum age an item is kept in the cache until it is
132 automatically evicted. Having a lot of dead luggage slows
133 everything down.
134 """
135 self.max_cache_size = max_cache_size
136 self.min_free_space = min_free_space
137 self.max_items = max_items
138 self.max_age_secs = max_age_secs
139
140 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000141 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
142 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
143 self.max_cache_size, float(self.max_cache_size) / 1024**3,
144 self.max_items, self.min_free_space,
145 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400146
147
148class CacheMiss(Exception):
149 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400150 def __init__(self, digest):
151 self.digest = digest
Junji Watanabe38b28b02020-04-23 10:23:30 +0000152 super(CacheMiss,
153 self).__init__('Item with digest %r is not found in cache' % digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400154
155
Junji Watanabeab2102a2022-01-12 01:44:04 +0000156class Cache:
Junji Watanabe38b28b02020-04-23 10:23:30 +0000157
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400158 def __init__(self, cache_dir):
159 if cache_dir is not None:
Junji Watanabe7a677e92022-01-13 06:07:31 +0000160 assert isinstance(cache_dir, str), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400161 assert file_path.isabs(cache_dir), cache_dir
162 self.cache_dir = cache_dir
163 self._lock = threading_utils.LockWithAssert()
164 # Profiling values.
165 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400166 self._used = []
167
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000168 def __nonzero__(self):
169 """A cache is always True.
170
171 Otherwise it falls back to __len__, which is surprising.
172 """
173 return True
174
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000175 def __bool__(self):
176 """A cache is always True.
177
178 Otherwise it falls back to __len__, which is surprising.
179 """
180 return True
181
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000182 def __len__(self):
183 """Returns the number of entries in the cache."""
184 raise NotImplementedError()
185
186 def __iter__(self):
187 """Iterates over all the entries names."""
188 raise NotImplementedError()
189
190 def __contains__(self, name):
191 """Returns if an entry is in the cache."""
192 raise NotImplementedError()
193
194 @property
195 def total_size(self):
196 """Returns the total size of the cache in bytes."""
197 raise NotImplementedError()
198
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400199 @property
200 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000201 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400202 with self._lock:
203 return self._added[:]
204
205 @property
206 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000207 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400208 with self._lock:
209 return self._used[:]
210
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000211 def get_oldest(self):
212 """Returns timestamp of oldest cache entry or None.
213
214 Returns:
215 Timestamp of the oldest item.
216
217 Used for manual trimming.
218 """
219 raise NotImplementedError()
220
221 def remove_oldest(self):
222 """Removes the oldest item from the cache.
223
224 Returns:
225 Size of the oldest item.
226
227 Used for manual trimming.
228 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400229 raise NotImplementedError()
230
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000231 def save(self):
232 """Saves the current cache to disk."""
233 raise NotImplementedError()
234
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400235 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000236 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400237
238 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000239 Slice with the size of evicted items.
240 """
241 raise NotImplementedError()
242
243 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000244 """Deletes any corrupted item from the cache, then calls trim(), then
245 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000246
247 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400248 """
249 raise NotImplementedError()
250
251
252class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400253 """Content addressed cache that stores objects temporarily.
254
255 It can be accessed concurrently from multiple threads, so it should protect
256 its internal state with some lock.
257 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400258
259 def __enter__(self):
260 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000261 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400262 return self
263
264 def __exit__(self, _exc_type, _exec_value, _traceback):
265 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000266 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400267 return False
268
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400269 def touch(self, digest, size):
270 """Ensures item is not corrupted and updates its LRU position.
271
272 Arguments:
273 digest: hash digest of item to check.
274 size: expected size of this item.
275
276 Returns:
277 True if item is in cache and not corrupted.
278 """
279 raise NotImplementedError()
280
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400281 def getfileobj(self, digest):
282 """Returns a readable file like object.
283
284 If file exists on the file system it will have a .name attribute with an
285 absolute path to the file.
286 """
287 raise NotImplementedError()
288
289 def write(self, digest, content):
290 """Reads data from |content| generator and stores it in cache.
291
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000292 It is possible to write to an object that already exists. It may be
293 ignored (sent to /dev/null) but the timestamp is still updated.
294
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400295 Returns digest to simplify chaining.
296 """
297 raise NotImplementedError()
298
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400299
300class MemoryContentAddressedCache(ContentAddressedCache):
301 """ContentAddressedCache implementation that stores everything in memory."""
302
Lei Leife202df2019-06-11 17:33:34 +0000303 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400304 """Args:
305 file_mode_mask: bit mask to AND file mode with. Default value will make
306 all mapped files to be read only.
307 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400308 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400309 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000310 # Items in a LRU lookup dict(digest: size).
311 self._lru = lru.LRUDict()
312
313 # Cache interface implementation.
314
315 def __len__(self):
316 with self._lock:
317 return len(self._lru)
318
319 def __iter__(self):
320 # This is not thread-safe.
321 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400322
323 def __contains__(self, digest):
324 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000325 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400326
327 @property
328 def total_size(self):
329 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000330 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400331
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000332 def get_oldest(self):
333 with self._lock:
334 try:
335 # (key, (value, ts))
336 return self._lru.get_oldest()[1][1]
337 except KeyError:
338 return None
339
340 def remove_oldest(self):
341 with self._lock:
342 # TODO(maruel): Update self._added.
343 # (key, (value, ts))
344 return len(self._lru.pop_oldest()[1][0])
345
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000346 def save(self):
347 pass
348
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000349 def trim(self):
350 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000351 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400352
353 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000354 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400355
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000356 # ContentAddressedCache interface implementation.
357
358 def __contains__(self, digest):
359 with self._lock:
360 return digest in self._lru
361
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400362 def touch(self, digest, size):
363 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000364 try:
365 self._lru.touch(digest)
366 except KeyError:
367 return False
368 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400369
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400370 def getfileobj(self, digest):
371 with self._lock:
372 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000373 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400374 except KeyError:
375 raise CacheMiss(digest)
376 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000377 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400378 return io.BytesIO(d)
379
380 def write(self, digest, content):
381 # Assemble whole stream before taking the lock.
Junji Watanabe7a677e92022-01-13 06:07:31 +0000382 data = b''.join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400383 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000384 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400385 self._added.append(len(data))
386 return digest
387
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400388
389class DiskContentAddressedCache(ContentAddressedCache):
390 """Stateful LRU cache in a flat hash table in a directory.
391
392 Saves its state as json file.
393 """
Junji Watanabe53d31882022-01-13 07:58:00 +0000394 STATE_FILE = 'state.json'
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400395
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000396 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400397 """
398 Arguments:
399 cache_dir: directory where to place the cache.
400 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400401 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000402 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400403 """
404 # All protected methods (starting with '_') except _path should be called
405 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400406 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400407 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400408 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
409 # Items in a LRU lookup dict(digest: size).
410 self._lru = lru.LRUDict()
411 # Current cached free disk space. It is updated by self._trim().
412 file_path.ensure_tree(self.cache_dir)
413 self._free_disk = file_path.get_free_space(self.cache_dir)
414 # The first item in the LRU cache that must not be evicted during this run
415 # since it was referenced. All items more recent that _protected in the LRU
416 # cache are also inherently protected. It could be a set() of all items
417 # referenced but this increases memory usage without a use case.
418 self._protected = None
419 # Cleanup operations done by self._load(), if any.
420 self._operations = []
421 with tools.Profiler('Setup'):
422 with self._lock:
423 self._load(trim, time_fn)
424
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000425 # Cache interface implementation.
426
427 def __len__(self):
428 with self._lock:
429 return len(self._lru)
430
431 def __iter__(self):
432 # This is not thread-safe.
433 return self._lru.__iter__()
434
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400435 def __contains__(self, digest):
436 with self._lock:
437 return digest in self._lru
438
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400439 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400440 def total_size(self):
441 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000442 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400443
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000444 def get_oldest(self):
445 with self._lock:
446 try:
447 # (key, (value, ts))
448 return self._lru.get_oldest()[1][1]
449 except KeyError:
450 return None
451
452 def remove_oldest(self):
453 with self._lock:
454 # TODO(maruel): Update self._added.
455 return self._remove_lru_file(True)
456
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000457 def save(self):
458 with self._lock:
459 return self._save()
460
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000461 def trim(self):
462 """Forces retention policies."""
463 with self._lock:
464 return self._trim()
465
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400466 def cleanup(self):
467 """Cleans up the cache directory.
468
469 Ensures there is no unknown files in cache_dir.
470 Ensures the read-only bits are set correctly.
471
472 At that point, the cache was already loaded, trimmed to respect cache
473 policies.
474 """
Junji Watanabe66041012021-08-11 06:40:08 +0000475 logging.info('DiskContentAddressedCache.cleanup(): Cleaning %s',
476 self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400477 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000478 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400479 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000480 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400481 # It'd be faster if there were a readdir() function.
482 for filename in fs.listdir(self.cache_dir):
483 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000484 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400485 continue
486 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000487 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400488 previous.remove(filename)
489 continue
490
491 # An untracked file. Delete it.
Junji Watanabe66041012021-08-11 06:40:08 +0000492 logging.warning(
493 'DiskContentAddressedCache.cleanup(): Removing unknown file %s',
494 filename)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400495 p = self._path(filename)
496 if fs.isdir(p):
497 try:
498 file_path.rmtree(p)
499 except OSError:
500 pass
501 else:
502 file_path.try_remove(p)
503 continue
504
505 if previous:
506 # Filter out entries that were not found.
Junji Watanabe66041012021-08-11 06:40:08 +0000507 logging.warning(
508 'DiskContentAddressedCache.cleanup(): Removed %d lost files',
509 len(previous))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400510 for filename in previous:
511 self._lru.pop(filename)
512 self._save()
513
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000514 # Verify hash of every single item to detect corruption. the corrupted
515 # files will be evicted.
Junji Watanabe66041012021-08-11 06:40:08 +0000516 total = 0
517 verified = 0
518 deleted = 0
519 logging.info(
520 'DiskContentAddressedCache.cleanup(): Verifying modified files')
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000521 with self._lock:
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000522 for digest, (_, timestamp) in list(self._lru._items.items()):
Junji Watanabe66041012021-08-11 06:40:08 +0000523 total += 1
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000524 # verify only if the mtime is grather than the timestamp in state.json
525 # to avoid take too long time.
526 if self._get_mtime(digest) <= timestamp:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000527 continue
Junji Watanabe66041012021-08-11 06:40:08 +0000528 logging.warning(
529 'DiskContentAddressedCache.cleanup(): Item has been modified.'
530 ' verifying item: %s', digest)
531 is_valid = self._is_valid_hash(digest)
532 verified += 1
533 logging.warning(
534 'DiskContentAddressedCache.cleanup(): verified. is_valid: %s, '
535 'item: %s', is_valid, digest)
536 if is_valid:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000537 # Update timestamp in state.json
538 self._lru.touch(digest)
539 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000540 # remove corrupted file from LRU and file system
541 self._lru.pop(digest)
542 self._delete_file(digest, UNKNOWN_FILE_SIZE)
Junji Watanabe66041012021-08-11 06:40:08 +0000543 deleted += 1
544 logging.error(
545 'DiskContentAddressedCache.cleanup(): Deleted corrupted item: %s',
546 digest)
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000547 self._save()
Junji Watanabe66041012021-08-11 06:40:08 +0000548 logging.info(
549 'DiskContentAddressedCache.cleanup(): Verified modified files.'
550 ' total: %d, verified: %d, deleted: %d', total, verified, deleted)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400551
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000552 # ContentAddressedCache interface implementation.
553
554 def __contains__(self, digest):
555 with self._lock:
556 return digest in self._lru
557
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400558 def touch(self, digest, size):
559 """Verifies an actual file is valid and bumps its LRU position.
560
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000561 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400562
563 Note that is doesn't compute the hash so it could still be corrupted if the
564 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400565 """
566 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000567 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400568
569 # Update its LRU position.
570 with self._lock:
571 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000572 if looks_valid:
573 # Exists but not in the LRU anymore.
574 self._delete_file(digest, size)
575 return False
576 if not looks_valid:
577 self._lru.pop(digest)
578 # Exists but not in the LRU anymore.
579 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400580 return False
581 self._lru.touch(digest)
582 self._protected = self._protected or digest
583 return True
584
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400585 def getfileobj(self, digest):
586 try:
587 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400588 except IOError:
589 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000590 with self._lock:
591 try:
592 self._used.append(self._lru[digest])
593 except KeyError:
594 # If the digest is not actually in _lru, assume it is a cache miss.
595 # Existing file will be overwritten by whoever uses the cache and added
596 # to _lru.
597 f.close()
598 raise CacheMiss(digest)
599 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400600
601 def write(self, digest, content):
602 assert content is not None
603 with self._lock:
604 self._protected = self._protected or digest
605 path = self._path(digest)
606 # A stale broken file may remain. It is possible for the file to have write
607 # access bit removed which would cause the file_write() call to fail to open
608 # in write mode. Take no chance here.
609 file_path.try_remove(path)
610 try:
611 size = file_write(path, content)
612 except:
613 # There are two possible places were an exception can occur:
614 # 1) Inside |content| generator in case of network or unzipping errors.
615 # 2) Inside file_write itself in case of disk IO errors.
616 # In any case delete an incomplete file and propagate the exception to
617 # caller, it will be logged there.
618 file_path.try_remove(path)
619 raise
620 # Make the file read-only in the cache. This has a few side-effects since
621 # the file node is modified, so every directory entries to this file becomes
622 # read-only. It's fine here because it is a new file.
623 file_path.set_read_only(path, True)
624 with self._lock:
625 self._add(digest, size)
626 return digest
627
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000628 # Internal functions.
629
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400630 def _load(self, trim, time_fn):
631 """Loads state of the cache from json file.
632
633 If cache_dir does not exist on disk, it is created.
634 """
635 self._lock.assert_locked()
636
637 if not fs.isfile(self.state_file):
638 if not fs.isdir(self.cache_dir):
639 fs.makedirs(self.cache_dir)
640 else:
641 # Load state of the cache.
642 try:
643 self._lru = lru.LRUDict.load(self.state_file)
644 except ValueError as err:
645 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000646 # Don't want to keep broken cache dir.
647 file_path.rmtree(self.cache_dir)
648 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000649 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400650 if time_fn:
651 self._lru.time_fn = time_fn
652 if trim:
653 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400654
655 def _save(self):
656 """Saves the LRU ordering."""
657 self._lock.assert_locked()
658 if sys.platform != 'win32':
659 d = os.path.dirname(self.state_file)
660 if fs.isdir(d):
661 # Necessary otherwise the file can't be created.
662 file_path.set_read_only(d, False)
663 if fs.isfile(self.state_file):
664 file_path.set_read_only(self.state_file, False)
665 self._lru.save(self.state_file)
666
667 def _trim(self):
668 """Trims anything we don't know, make sure enough free space exists."""
669 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000670 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400671
672 # Trim old items.
673 if self.policies.max_age_secs:
674 cutoff = self._lru.time_fn() - self.policies.max_age_secs
675 while self._lru:
676 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000677 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400678 if oldest[1][1] >= cutoff:
679 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000680 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400681
682 # Ensure maximum cache size.
683 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000684 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400685 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000686 e = self._remove_lru_file(True)
687 evicted.append(e)
688 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400689
690 # Ensure maximum number of items in the cache.
691 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000692 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000693 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400694
695 # Ensure enough free space.
696 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400697 while (
698 self.policies.min_free_space and
699 self._lru and
700 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000701 # self._free_disk is updated by this call.
702 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400703
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000704 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000705 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400706 usage_percent = 0.
707 if total_usage:
708 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
709
710 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000711 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
712 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
Junji Watanabe38b28b02020-04-23 10:23:30 +0000713 '%.1fkb)', len(evicted),
714 sum(evicted) / 1024., self._free_disk / 1024., total_usage / 1024.,
715 usage_percent, self.policies.max_cache_size / 1024.)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400716 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000717 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400718
719 def _path(self, digest):
720 """Returns the path to one item."""
721 return os.path.join(self.cache_dir, digest)
722
723 def _remove_lru_file(self, allow_protected):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000724 """Removes the latest recently used file and returns its size.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000725
726 Updates self._free_disk.
727 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400728 self._lock.assert_locked()
729 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000730 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400731 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000732 total_size = sum(self._lru.values())
733 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000734 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
735 '%s bytes (%.3f GiB) free_space') % (
736 self.policies, total_size, float(total_size) / 1024**3,
737 len(self._lru), self._free_disk,
738 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400739 raise NoMoreSpace(msg)
740 except KeyError:
741 # That means an internal error.
742 raise NoMoreSpace('Nothing to remove, can\'t happend')
743 digest, (size, _) = self._lru.pop_oldest()
Takuto Ikuta8d8ca9b2021-02-26 02:31:43 +0000744 logging.debug('Removing LRU file %s with size %s bytes', digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400745 self._delete_file(digest, size)
746 return size
747
748 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
749 """Adds an item into LRU cache marking it as a newest one."""
750 self._lock.assert_locked()
751 if size == UNKNOWN_FILE_SIZE:
752 size = fs.stat(self._path(digest)).st_size
753 self._added.append(size)
754 self._lru.add(digest, size)
755 self._free_disk -= size
756 # Do a quicker version of self._trim(). It only enforces free disk space,
757 # not cache size limits. It doesn't actually look at real free disk space,
758 # only uses its cache values. self._trim() will be called later to enforce
759 # real trimming but doing this quick version here makes it possible to map
760 # an isolated that is larger than the current amount of free disk space when
761 # the cache size is already large.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000762 while (self.policies.min_free_space and self._lru and
763 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000764 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400765 if self._remove_lru_file(False) == -1:
766 break
767
768 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000769 """Deletes cache file from the file system.
770
771 Updates self._free_disk.
772 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400773 self._lock.assert_locked()
774 try:
775 if size == UNKNOWN_FILE_SIZE:
776 try:
777 size = fs.stat(self._path(digest)).st_size
778 except OSError:
779 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000780 if file_path.try_remove(self._path(digest)):
781 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400782 except OSError as e:
783 if e.errno != errno.ENOENT:
784 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400785
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000786 def _get_mtime(self, digest):
787 """Get mtime of cache file."""
788 return os.path.getmtime(self._path(digest))
789
790 def _is_valid_hash(self, digest):
791 """Verify digest with supported hash algos."""
Takuto Ikuta922c8642021-11-18 07:42:16 +0000792 d = hashlib.sha256()
793 with fs.open(self._path(digest), 'rb') as f:
794 while True:
795 chunk = f.read(1024 * 1024)
796 if not chunk:
797 break
798 d.update(chunk)
799 return digest == d.hexdigest()
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000800
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400801
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400802class NamedCache(Cache):
803 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400804
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400805 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400806 name is a short identifier that describes the contents of the cache, e.g.
807 "git_v8" could be all git repositories required by v8 builds, or
808 "build_chromium" could be build artefacts of the Chromium.
809 path is a directory path relative to the task run dir. Cache installation
810 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400811 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400812 _DIR_ALPHABET = string.ascii_letters + string.digits
Junji Watanabe53d31882022-01-13 07:58:00 +0000813 STATE_FILE = 'state.json'
814 NAMED_DIR = 'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400815
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400816 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400817 """Initializes NamedCaches.
818
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400819 Arguments:
820 - cache_dir is a directory for persistent cache storage.
821 - policies is a CachePolicies instance.
822 - time_fn is a function that returns timestamp (float) and used to take
823 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400824 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400825 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400826 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000827 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400828 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
829 self._lru = lru.LRUDict()
830 if not fs.isdir(self.cache_dir):
831 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000832 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000833 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400834 self._lru = lru.LRUDict.load(self.state_file)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000835 for _, size in self._lru.values():
Junji Watanabe7a677e92022-01-13 06:07:31 +0000836 if not isinstance(size, int):
Takuto Ikuta6acf8f92020-07-02 02:06:42 +0000837 with open(self.state_file, 'r') as f:
838 logging.info('named cache state file: %s\n%s', self.state_file,
839 f.read())
Junji Watanabeedcf47d2020-06-11 08:41:01 +0000840 raise ValueError("size is not integer: %s" % size)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000841
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400842 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000843 logging.exception(
844 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400845 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000846 fs.makedirs(self.cache_dir)
Takuto Ikutadadfbb02020-07-10 03:31:26 +0000847 self._lru = lru.LRUDict()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000848 with self._lock:
849 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400850 if time_fn:
851 self._lru.time_fn = time_fn
852
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400853 @property
854 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000855 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400856 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000857 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400858
Takuto Ikutaeab23172020-07-02 03:50:02 +0000859 def _sudo_chown(self, path):
860 if sys.platform == 'win32':
861 return
862 uid = os.getuid()
863 if os.stat(path).st_uid == uid:
864 return
865 # Maybe owner of |path| is different from runner of this script. This is to
866 # make fs.rename work in that case.
867 # https://crbug.com/986676
868 subprocess.check_call(['sudo', '-n', 'chown', str(uid), path])
869
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000870 def install(self, dst, name):
871 """Creates the directory |dst| and moves a previous named cache |name| if it
872 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400873
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000874 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400875
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000876 Returns the reused named cache size in bytes, or 0 if none was present.
877
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400878 Raises NamedCacheError if cannot install the cache.
879 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000880 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400881 with self._lock:
882 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000883 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400884 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000885 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400886
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000887 # Remove the named symlink if it exists.
888 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000889 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000890 # Remove the symlink itself, not its destination.
891 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000892
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000893 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000894 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400895 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000896 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000897 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000898 file_path.ensure_tree(os.path.dirname(dst))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000899 self._sudo_chown(abs_cache)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000900 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400901 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000902 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400903
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000904 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400905 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400906
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000907 # The named cache does not exist, create an empty directory. When
908 # uninstalling, we will move it back to the cache and create an an
909 # entry.
910 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000911 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000912 return 0
Junji Watanabed2ab86b2021-08-13 07:20:23 +0000913 except (IOError, OSError, PermissionError) as ex:
Takuto Ikuta2fe58fd2021-08-18 13:47:36 +0000914 if sys.platform == 'win32':
915 print("There may be running process in cache"
916 " e.g. https://crbug.com/1239809#c14",
917 file=sys.stderr)
918 subprocess.check_call(
919 ["powershell", "get-process | select path,starttime"])
920
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000921 # Raise using the original traceback.
922 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000923 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Junji Watanabe7a677e92022-01-13 06:07:31 +0000924 raise exc.with_traceback(sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000925 finally:
926 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400927
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000928 def uninstall(self, src, name):
929 """Moves the cache directory back into the named cache hive for an eventual
930 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400931
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000932 The opposite of install().
933
934 src must be absolute and unicode. Its content is moved back into the local
935 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400936
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000937 Returns the named cache size in bytes.
938
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400939 Raises NamedCacheError if cannot uninstall the cache.
940 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000941 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Junji Watanabe9cdfff52021-01-08 07:20:35 +0000942 start = time.time()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400943 with self._lock:
944 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000945 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400946 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000947 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000948 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400949 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400950
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000951 if name in self._lru:
952 # This shouldn't happen but just remove the preexisting one and move
953 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000954 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000955 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000956
Takuto Ikutac1bdcf22021-10-27 05:07:26 +0000957 # Calculate the size of the named cache to keep. It's important because
958 # if size is zero (it's empty), we do not want to add it back to the
959 # named caches cache.
Takuto Ikuta995da062021-03-17 05:01:59 +0000960 size = file_path.get_recursive_size(src)
Takuto Ikutac1bdcf22021-10-27 05:07:26 +0000961 logging.info('- Size is %d', size)
962 if not size:
963 # Do not save empty named cache.
964 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400965
966 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000967 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400968 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000969 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400970 file_path.ensure_tree(os.path.dirname(abs_cache))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000971 self._sudo_chown(src)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000972 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400973
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000974 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000975 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000976
977 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
978 # for user convenience.
979 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000980 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000981 file_path.remove(named_path)
982 else:
983 file_path.ensure_tree(os.path.dirname(named_path))
984
985 try:
Junji Watanabe53d31882022-01-13 07:58:00 +0000986 fs.symlink(os.path.join('..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000987 logging.info(
988 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000989 except OSError:
990 # Ignore on Windows. It happens when running as a normal user or when
991 # UAC is enabled and the user is a filtered administrator account.
992 if sys.platform != 'win32':
993 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000994 return size
Junji Watanabed2ab86b2021-08-13 07:20:23 +0000995 except (IOError, OSError, PermissionError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000996 # Raise using the original traceback.
997 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000998 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Junji Watanabe7a677e92022-01-13 06:07:31 +0000999 raise exc.with_traceback(sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001000 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001001 # Call save() at every uninstall. The assumptions are:
1002 # - The total the number of named caches is low, so the state.json file
1003 # is small, so the time it takes to write it to disk is short.
1004 # - The number of mapped named caches per task is low, so the number of
1005 # times save() is called on tear-down isn't high enough to be
1006 # significant.
1007 # - uninstall() sometimes throws due to file locking on Windows or
1008 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001009 self._save()
Junji Watanabe9cdfff52021-01-08 07:20:35 +00001010 logging.info('NamedCache.uninstall(%r, %r) took %d seconds', src, name,
1011 time.time() - start)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001012
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001013 # Cache interface implementation.
1014
1015 def __len__(self):
1016 with self._lock:
1017 return len(self._lru)
1018
1019 def __iter__(self):
1020 # This is not thread-safe.
1021 return self._lru.__iter__()
1022
John Budorickc6186972020-02-26 00:58:14 +00001023 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001024 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +00001025 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001026
1027 @property
1028 def total_size(self):
1029 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001030 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001031
1032 def get_oldest(self):
1033 with self._lock:
1034 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001035 # (key, (value, ts))
1036 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001037 except KeyError:
1038 return None
1039
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001040 def remove_oldest(self):
1041 with self._lock:
1042 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001043 _name, size = self._remove_lru_item()
1044 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001045
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001046 def save(self):
1047 with self._lock:
1048 return self._save()
1049
John Budorickc6186972020-02-26 00:58:14 +00001050 def touch(self, *names):
1051 with self._lock:
1052 for name in names:
1053 if name in self._lru:
1054 self._lru.touch(name)
1055 self._save()
1056
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001057 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001058 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001059 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001060 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001061 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001062
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001063 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001064 if self._policies.max_items:
1065 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001066 name, size = self._remove_lru_item()
1067 evicted.append(size)
1068 logging.info(
1069 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1070 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001071
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001072 # Trim according to maximum age.
1073 if self._policies.max_age_secs:
1074 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1075 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001076 _name, (_data, ts) = self._lru.get_oldest()
1077 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001078 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001079 name, size = self._remove_lru_item()
1080 evicted.append(size)
1081 logging.info(
1082 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1083 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001084
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001085 # Trim according to minimum free space.
1086 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001087 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001088 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001089 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001090 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001091 name, size = self._remove_lru_item()
1092 evicted.append(size)
1093 logging.info(
1094 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1095 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001096
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001097 # Trim according to maximum total size.
1098 if self._policies.max_cache_size:
1099 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001100 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001101 if total <= self._policies.max_cache_size:
1102 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001103 name, size = self._remove_lru_item()
1104 evicted.append(size)
1105 logging.info(
1106 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1107 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001108
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001109 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001110 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001111
1112 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001113 """Removes unknown directories.
1114
1115 Does not recalculate the cache size since it's surprisingly slow on some
1116 OSes.
1117 """
Junji Watanabe66041012021-08-11 06:40:08 +00001118 logging.info('NamedCache.cleanup(): Cleaning %s', self.cache_dir)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001119 success = True
1120 with self._lock:
1121 try:
1122 actual = set(fs.listdir(self.cache_dir))
1123 actual.discard(self.NAMED_DIR)
1124 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001125 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001126 # First, handle the actual cache content.
1127 # Remove missing entries.
1128 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001129 name, size = self._lru.pop(expected[missing])
1130 logging.warning(
1131 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001132 # Remove unexpected items.
1133 for unexpected in (actual - set(expected)):
1134 try:
1135 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001136 logging.warning(
1137 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001138 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001139 file_path.rmtree(p)
1140 else:
1141 fs.remove(p)
1142 except (IOError, OSError) as e:
1143 logging.error('Failed to remove %s: %s', unexpected, e)
1144 success = False
1145
1146 # Second, fix named cache links.
1147 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001148 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001149 actual = set(fs.listdir(named))
1150 expected = set(self._lru)
1151 # Confirm entries. Do not add missing ones for now.
1152 for name in expected.intersection(actual):
1153 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Junji Watanabe53d31882022-01-13 07:58:00 +00001154 expected_link = os.path.join('..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001155 if fs.islink(p):
1156 link = fs.readlink(p)
1157 if expected_link == link:
1158 continue
1159 logging.warning(
1160 'Unexpected symlink for cache %s: %s, expected %s',
1161 name, link, expected_link)
1162 else:
1163 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001164 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001165 file_path.rmtree(p)
1166 else:
1167 fs.remove(p)
1168 # Remove unexpected items.
1169 for unexpected in (actual - expected):
1170 try:
1171 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1172 if fs.isdir(p):
1173 file_path.rmtree(p)
1174 else:
1175 fs.remove(p)
1176 except (IOError, OSError) as e:
1177 logging.error('Failed to remove %s: %s', unexpected, e)
1178 success = False
1179 finally:
1180 self._save()
1181 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001182
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001183 # Internal functions.
1184
1185 def _try_upgrade(self):
1186 """Upgrades from the old format to the new one if necessary.
1187
1188 This code can be removed so all bots are known to have the right new format.
1189 """
1190 if not self._lru:
1191 return
1192 _name, (data, _ts) = self._lru.get_oldest()
1193 if isinstance(data, (list, tuple)):
1194 return
1195 # Update to v2.
1196 def upgrade(_name, rel_cache):
1197 abs_cache = os.path.join(self.cache_dir, rel_cache)
Takuto Ikuta995da062021-03-17 05:01:59 +00001198 return rel_cache, file_path.get_recursive_size(abs_cache)
1199
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001200 self._lru.transform(upgrade)
1201 self._save()
1202
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001203 def _remove_lru_item(self):
1204 """Removes the oldest LRU entry. LRU must not be empty."""
1205 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
Takuto Ikuta74686842021-07-30 04:11:03 +00001206 logging.info('Removing named cache %r, %d', name, size)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001207 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001208 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001209
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001210 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001211 """Creates and returns relative path of a new cache directory.
1212
1213 In practice, it is a 2-letter string.
1214 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001215 # We randomly generate directory names that have two lower/upper case
1216 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1217 abc_len = len(self._DIR_ALPHABET)
1218 tried = set()
1219 while len(tried) < 1000:
1220 i = random.randint(0, abc_len * abc_len - 1)
1221 rel_path = (
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001222 self._DIR_ALPHABET[i // abc_len] + self._DIR_ALPHABET[i % abc_len])
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001223 if rel_path in tried:
1224 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001225 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001226 if not fs.exists(abs_path):
1227 return rel_path
1228 tried.add(rel_path)
1229 raise NamedCacheError(
1230 'could not allocate a new cache dir, too many cache dirs')
1231
1232 def _remove(self, name):
1233 """Removes a cache directory and entry.
1234
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001235 Returns:
1236 Number of caches deleted.
1237 """
1238 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001239 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001240 named_dir = self._get_named_path(name)
1241 if fs.islink(named_dir):
1242 fs.unlink(named_dir)
1243
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001244 # Then remove the actual data.
1245 if name not in self._lru:
1246 return
1247 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001248 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001249 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001250 file_path.rmtree(abs_path)
1251 self._lru.pop(name)
1252
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001253 def _save(self):
1254 self._lock.assert_locked()
1255 file_path.ensure_tree(self.cache_dir)
1256 self._lru.save(self.state_file)
1257
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001258 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001259 return os.path.join(self.cache_dir, self.NAMED_DIR, name)