blob: 7e6d1b3404fa444ec8ad7ec73ca06024a4bfc631 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Takuto Ikuta2fe58fd2021-08-18 13:47:36 +00007from __future__ import print_function
8
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04009import errno
Takuto Ikuta922c8642021-11-18 07:42:16 +000010import hashlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040011import io
12import logging
13import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import random
15import string
Junji Watanabe7b720782020-07-01 01:51:07 +000016import subprocess
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040017import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000018import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040019
20from utils import file_path
21from utils import fs
22from utils import lru
23from utils import threading_utils
24from utils import tools
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040025
26# The file size to be used when we don't know the correct file size,
27# generally used for .isolated files.
28UNKNOWN_FILE_SIZE = None
29
30
31def file_write(path, content_generator):
32 """Writes file content as generated by content_generator.
33
34 Creates the intermediary directory as needed.
35
36 Returns the number of bytes written.
37
38 Meant to be mocked out in unit tests.
39 """
40 file_path.ensure_tree(os.path.dirname(path))
41 total = 0
42 with fs.open(path, 'wb') as f:
43 for d in content_generator:
44 total += len(d)
45 f.write(d)
46 return total
47
48
49def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000050 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040051
52 Currently it just checks the file exists and its size matches the expectation.
53 """
54 if size == UNKNOWN_FILE_SIZE:
55 return fs.isfile(path)
56 try:
57 actual_size = fs.stat(path).st_size
58 except OSError as e:
Junji Watanabe38b28b02020-04-23 10:23:30 +000059 logging.warning('Can\'t read item %s, assuming it\'s invalid: %s',
60 os.path.basename(path), e)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040061 return False
62 if size != actual_size:
63 logging.warning(
64 'Found invalid item %s; %d != %d',
65 os.path.basename(path), actual_size, size)
66 return False
67 return True
68
69
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000070def trim_caches(caches, path, min_free_space, max_age_secs):
71 """Trims multiple caches.
72
73 The goal here is to coherently trim all caches in a coherent LRU fashion,
74 deleting older items independent of which container they belong to.
75
76 Two policies are enforced first:
77 - max_age_secs
78 - min_free_space
79
80 Once that's done, then we enforce each cache's own policies.
81
82 Returns:
83 Slice containing the size of all items evicted.
84 """
85 min_ts = time.time() - max_age_secs if max_age_secs else 0
86 free_disk = file_path.get_free_space(path) if min_free_space else 0
Junji Watanabe66041012021-08-11 06:40:08 +000087 logging.info("Trimming caches. min_ts: %d, free_disk: %d, min_free_space: %d",
88 min_ts, free_disk, min_free_space)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000089 total = []
90 if min_ts or free_disk:
91 while True:
92 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
93 if not oldest:
94 break
Lei Leife202df2019-06-11 17:33:34 +000095 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000096 c, ts = oldest[0]
97 if ts >= min_ts and free_disk >= min_free_space:
98 break
99 total.append(c.remove_oldest())
100 if min_free_space:
101 free_disk = file_path.get_free_space(path)
Takuto Ikuta74686842021-07-30 04:11:03 +0000102 logging.info("free_disk after removing oldest entries: %d", free_disk)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000103 # Evaluate each cache's own policies.
104 for c in caches:
105 total.extend(c.trim())
106 return total
107
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000108
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400109class NamedCacheError(Exception):
110 """Named cache specific error."""
111
112
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400113class NoMoreSpace(Exception):
114 """Not enough space to map the whole directory."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400115
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400116
Junji Watanabeab2102a2022-01-12 01:44:04 +0000117class CachePolicies:
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400118 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
119 """Common caching policies for the multiple caches (isolated, named, cipd).
120
121 Arguments:
122 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
123 cache is effectively a leak.
124 - min_free_space: Trim if disk free space becomes lower than this value. If
125 0, it will unconditionally fill the disk.
126 - max_items: Maximum number of items to keep in the cache. If 0, do not
127 enforce a limit.
128 - max_age_secs: Maximum age an item is kept in the cache until it is
129 automatically evicted. Having a lot of dead luggage slows
130 everything down.
131 """
132 self.max_cache_size = max_cache_size
133 self.min_free_space = min_free_space
134 self.max_items = max_items
135 self.max_age_secs = max_age_secs
136
137 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000138 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
139 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
140 self.max_cache_size, float(self.max_cache_size) / 1024**3,
141 self.max_items, self.min_free_space,
142 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400143
144
145class CacheMiss(Exception):
146 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400147 def __init__(self, digest):
148 self.digest = digest
Junji Watanabe38b28b02020-04-23 10:23:30 +0000149 super(CacheMiss,
150 self).__init__('Item with digest %r is not found in cache' % digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400151
152
Junji Watanabeab2102a2022-01-12 01:44:04 +0000153class Cache:
Junji Watanabe38b28b02020-04-23 10:23:30 +0000154
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400155 def __init__(self, cache_dir):
156 if cache_dir is not None:
Junji Watanabe7a677e92022-01-13 06:07:31 +0000157 assert isinstance(cache_dir, str), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400158 assert file_path.isabs(cache_dir), cache_dir
159 self.cache_dir = cache_dir
160 self._lock = threading_utils.LockWithAssert()
161 # Profiling values.
162 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400163 self._used = []
164
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000165 def __nonzero__(self):
166 """A cache is always True.
167
168 Otherwise it falls back to __len__, which is surprising.
169 """
170 return True
171
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000172 def __bool__(self):
173 """A cache is always True.
174
175 Otherwise it falls back to __len__, which is surprising.
176 """
177 return True
178
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000179 def __len__(self):
180 """Returns the number of entries in the cache."""
181 raise NotImplementedError()
182
183 def __iter__(self):
184 """Iterates over all the entries names."""
185 raise NotImplementedError()
186
187 def __contains__(self, name):
188 """Returns if an entry is in the cache."""
189 raise NotImplementedError()
190
191 @property
192 def total_size(self):
193 """Returns the total size of the cache in bytes."""
194 raise NotImplementedError()
195
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400196 @property
197 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000198 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400199 with self._lock:
200 return self._added[:]
201
202 @property
203 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000204 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400205 with self._lock:
206 return self._used[:]
207
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000208 def get_oldest(self):
209 """Returns timestamp of oldest cache entry or None.
210
211 Returns:
212 Timestamp of the oldest item.
213
214 Used for manual trimming.
215 """
216 raise NotImplementedError()
217
218 def remove_oldest(self):
219 """Removes the oldest item from the cache.
220
221 Returns:
222 Size of the oldest item.
223
224 Used for manual trimming.
225 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400226 raise NotImplementedError()
227
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000228 def save(self):
229 """Saves the current cache to disk."""
230 raise NotImplementedError()
231
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400232 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000233 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400234
235 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000236 Slice with the size of evicted items.
237 """
238 raise NotImplementedError()
239
240 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000241 """Deletes any corrupted item from the cache, then calls trim(), then
242 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000243
244 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400245 """
246 raise NotImplementedError()
247
248
249class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400250 """Content addressed cache that stores objects temporarily.
251
252 It can be accessed concurrently from multiple threads, so it should protect
253 its internal state with some lock.
254 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400255
256 def __enter__(self):
257 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000258 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400259 return self
260
261 def __exit__(self, _exc_type, _exec_value, _traceback):
262 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000263 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400264 return False
265
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400266 def touch(self, digest, size):
267 """Ensures item is not corrupted and updates its LRU position.
268
269 Arguments:
270 digest: hash digest of item to check.
271 size: expected size of this item.
272
273 Returns:
274 True if item is in cache and not corrupted.
275 """
276 raise NotImplementedError()
277
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400278 def getfileobj(self, digest):
279 """Returns a readable file like object.
280
281 If file exists on the file system it will have a .name attribute with an
282 absolute path to the file.
283 """
284 raise NotImplementedError()
285
286 def write(self, digest, content):
287 """Reads data from |content| generator and stores it in cache.
288
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000289 It is possible to write to an object that already exists. It may be
290 ignored (sent to /dev/null) but the timestamp is still updated.
291
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400292 Returns digest to simplify chaining.
293 """
294 raise NotImplementedError()
295
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400296
297class MemoryContentAddressedCache(ContentAddressedCache):
298 """ContentAddressedCache implementation that stores everything in memory."""
299
Lei Leife202df2019-06-11 17:33:34 +0000300 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400301 """Args:
302 file_mode_mask: bit mask to AND file mode with. Default value will make
303 all mapped files to be read only.
304 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400305 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400306 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000307 # Items in a LRU lookup dict(digest: size).
308 self._lru = lru.LRUDict()
309
310 # Cache interface implementation.
311
312 def __len__(self):
313 with self._lock:
314 return len(self._lru)
315
316 def __iter__(self):
317 # This is not thread-safe.
318 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400319
320 def __contains__(self, digest):
321 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000322 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400323
324 @property
325 def total_size(self):
326 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000327 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400328
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000329 def get_oldest(self):
330 with self._lock:
331 try:
332 # (key, (value, ts))
333 return self._lru.get_oldest()[1][1]
334 except KeyError:
335 return None
336
337 def remove_oldest(self):
338 with self._lock:
339 # TODO(maruel): Update self._added.
340 # (key, (value, ts))
341 return len(self._lru.pop_oldest()[1][0])
342
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000343 def save(self):
344 pass
345
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000346 def trim(self):
347 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000348 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400349
350 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000351 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400352
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000353 # ContentAddressedCache interface implementation.
354
355 def __contains__(self, digest):
356 with self._lock:
357 return digest in self._lru
358
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400359 def touch(self, digest, size):
360 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000361 try:
362 self._lru.touch(digest)
363 except KeyError:
364 return False
365 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400366
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400367 def getfileobj(self, digest):
368 with self._lock:
369 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000370 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400371 except KeyError:
372 raise CacheMiss(digest)
373 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000374 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400375 return io.BytesIO(d)
376
377 def write(self, digest, content):
378 # Assemble whole stream before taking the lock.
Junji Watanabe7a677e92022-01-13 06:07:31 +0000379 data = b''.join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400380 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000381 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400382 self._added.append(len(data))
383 return digest
384
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400385
386class DiskContentAddressedCache(ContentAddressedCache):
387 """Stateful LRU cache in a flat hash table in a directory.
388
389 Saves its state as json file.
390 """
391 STATE_FILE = u'state.json'
392
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000393 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400394 """
395 Arguments:
396 cache_dir: directory where to place the cache.
397 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400398 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000399 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400400 """
401 # All protected methods (starting with '_') except _path should be called
402 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400403 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400404 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400405 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
406 # Items in a LRU lookup dict(digest: size).
407 self._lru = lru.LRUDict()
408 # Current cached free disk space. It is updated by self._trim().
409 file_path.ensure_tree(self.cache_dir)
410 self._free_disk = file_path.get_free_space(self.cache_dir)
411 # The first item in the LRU cache that must not be evicted during this run
412 # since it was referenced. All items more recent that _protected in the LRU
413 # cache are also inherently protected. It could be a set() of all items
414 # referenced but this increases memory usage without a use case.
415 self._protected = None
416 # Cleanup operations done by self._load(), if any.
417 self._operations = []
418 with tools.Profiler('Setup'):
419 with self._lock:
420 self._load(trim, time_fn)
421
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000422 # Cache interface implementation.
423
424 def __len__(self):
425 with self._lock:
426 return len(self._lru)
427
428 def __iter__(self):
429 # This is not thread-safe.
430 return self._lru.__iter__()
431
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400432 def __contains__(self, digest):
433 with self._lock:
434 return digest in self._lru
435
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400436 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400437 def total_size(self):
438 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000439 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400440
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000441 def get_oldest(self):
442 with self._lock:
443 try:
444 # (key, (value, ts))
445 return self._lru.get_oldest()[1][1]
446 except KeyError:
447 return None
448
449 def remove_oldest(self):
450 with self._lock:
451 # TODO(maruel): Update self._added.
452 return self._remove_lru_file(True)
453
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000454 def save(self):
455 with self._lock:
456 return self._save()
457
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000458 def trim(self):
459 """Forces retention policies."""
460 with self._lock:
461 return self._trim()
462
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400463 def cleanup(self):
464 """Cleans up the cache directory.
465
466 Ensures there is no unknown files in cache_dir.
467 Ensures the read-only bits are set correctly.
468
469 At that point, the cache was already loaded, trimmed to respect cache
470 policies.
471 """
Junji Watanabe66041012021-08-11 06:40:08 +0000472 logging.info('DiskContentAddressedCache.cleanup(): Cleaning %s',
473 self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400474 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000475 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400476 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000477 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400478 # It'd be faster if there were a readdir() function.
479 for filename in fs.listdir(self.cache_dir):
480 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000481 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400482 continue
483 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000484 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400485 previous.remove(filename)
486 continue
487
488 # An untracked file. Delete it.
Junji Watanabe66041012021-08-11 06:40:08 +0000489 logging.warning(
490 'DiskContentAddressedCache.cleanup(): Removing unknown file %s',
491 filename)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400492 p = self._path(filename)
493 if fs.isdir(p):
494 try:
495 file_path.rmtree(p)
496 except OSError:
497 pass
498 else:
499 file_path.try_remove(p)
500 continue
501
502 if previous:
503 # Filter out entries that were not found.
Junji Watanabe66041012021-08-11 06:40:08 +0000504 logging.warning(
505 'DiskContentAddressedCache.cleanup(): Removed %d lost files',
506 len(previous))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400507 for filename in previous:
508 self._lru.pop(filename)
509 self._save()
510
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000511 # Verify hash of every single item to detect corruption. the corrupted
512 # files will be evicted.
Junji Watanabe66041012021-08-11 06:40:08 +0000513 total = 0
514 verified = 0
515 deleted = 0
516 logging.info(
517 'DiskContentAddressedCache.cleanup(): Verifying modified files')
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000518 with self._lock:
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000519 for digest, (_, timestamp) in list(self._lru._items.items()):
Junji Watanabe66041012021-08-11 06:40:08 +0000520 total += 1
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000521 # verify only if the mtime is grather than the timestamp in state.json
522 # to avoid take too long time.
523 if self._get_mtime(digest) <= timestamp:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000524 continue
Junji Watanabe66041012021-08-11 06:40:08 +0000525 logging.warning(
526 'DiskContentAddressedCache.cleanup(): Item has been modified.'
527 ' verifying item: %s', digest)
528 is_valid = self._is_valid_hash(digest)
529 verified += 1
530 logging.warning(
531 'DiskContentAddressedCache.cleanup(): verified. is_valid: %s, '
532 'item: %s', is_valid, digest)
533 if is_valid:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000534 # Update timestamp in state.json
535 self._lru.touch(digest)
536 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000537 # remove corrupted file from LRU and file system
538 self._lru.pop(digest)
539 self._delete_file(digest, UNKNOWN_FILE_SIZE)
Junji Watanabe66041012021-08-11 06:40:08 +0000540 deleted += 1
541 logging.error(
542 'DiskContentAddressedCache.cleanup(): Deleted corrupted item: %s',
543 digest)
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000544 self._save()
Junji Watanabe66041012021-08-11 06:40:08 +0000545 logging.info(
546 'DiskContentAddressedCache.cleanup(): Verified modified files.'
547 ' total: %d, verified: %d, deleted: %d', total, verified, deleted)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400548
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000549 # ContentAddressedCache interface implementation.
550
551 def __contains__(self, digest):
552 with self._lock:
553 return digest in self._lru
554
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400555 def touch(self, digest, size):
556 """Verifies an actual file is valid and bumps its LRU position.
557
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000558 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400559
560 Note that is doesn't compute the hash so it could still be corrupted if the
561 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400562 """
563 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000564 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400565
566 # Update its LRU position.
567 with self._lock:
568 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000569 if looks_valid:
570 # Exists but not in the LRU anymore.
571 self._delete_file(digest, size)
572 return False
573 if not looks_valid:
574 self._lru.pop(digest)
575 # Exists but not in the LRU anymore.
576 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400577 return False
578 self._lru.touch(digest)
579 self._protected = self._protected or digest
580 return True
581
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400582 def getfileobj(self, digest):
583 try:
584 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400585 except IOError:
586 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000587 with self._lock:
588 try:
589 self._used.append(self._lru[digest])
590 except KeyError:
591 # If the digest is not actually in _lru, assume it is a cache miss.
592 # Existing file will be overwritten by whoever uses the cache and added
593 # to _lru.
594 f.close()
595 raise CacheMiss(digest)
596 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400597
598 def write(self, digest, content):
599 assert content is not None
600 with self._lock:
601 self._protected = self._protected or digest
602 path = self._path(digest)
603 # A stale broken file may remain. It is possible for the file to have write
604 # access bit removed which would cause the file_write() call to fail to open
605 # in write mode. Take no chance here.
606 file_path.try_remove(path)
607 try:
608 size = file_write(path, content)
609 except:
610 # There are two possible places were an exception can occur:
611 # 1) Inside |content| generator in case of network or unzipping errors.
612 # 2) Inside file_write itself in case of disk IO errors.
613 # In any case delete an incomplete file and propagate the exception to
614 # caller, it will be logged there.
615 file_path.try_remove(path)
616 raise
617 # Make the file read-only in the cache. This has a few side-effects since
618 # the file node is modified, so every directory entries to this file becomes
619 # read-only. It's fine here because it is a new file.
620 file_path.set_read_only(path, True)
621 with self._lock:
622 self._add(digest, size)
623 return digest
624
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000625 # Internal functions.
626
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400627 def _load(self, trim, time_fn):
628 """Loads state of the cache from json file.
629
630 If cache_dir does not exist on disk, it is created.
631 """
632 self._lock.assert_locked()
633
634 if not fs.isfile(self.state_file):
635 if not fs.isdir(self.cache_dir):
636 fs.makedirs(self.cache_dir)
637 else:
638 # Load state of the cache.
639 try:
640 self._lru = lru.LRUDict.load(self.state_file)
641 except ValueError as err:
642 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000643 # Don't want to keep broken cache dir.
644 file_path.rmtree(self.cache_dir)
645 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000646 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400647 if time_fn:
648 self._lru.time_fn = time_fn
649 if trim:
650 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400651
652 def _save(self):
653 """Saves the LRU ordering."""
654 self._lock.assert_locked()
655 if sys.platform != 'win32':
656 d = os.path.dirname(self.state_file)
657 if fs.isdir(d):
658 # Necessary otherwise the file can't be created.
659 file_path.set_read_only(d, False)
660 if fs.isfile(self.state_file):
661 file_path.set_read_only(self.state_file, False)
662 self._lru.save(self.state_file)
663
664 def _trim(self):
665 """Trims anything we don't know, make sure enough free space exists."""
666 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000667 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400668
669 # Trim old items.
670 if self.policies.max_age_secs:
671 cutoff = self._lru.time_fn() - self.policies.max_age_secs
672 while self._lru:
673 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000674 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400675 if oldest[1][1] >= cutoff:
676 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000677 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400678
679 # Ensure maximum cache size.
680 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000681 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400682 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000683 e = self._remove_lru_file(True)
684 evicted.append(e)
685 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400686
687 # Ensure maximum number of items in the cache.
688 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000689 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000690 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400691
692 # Ensure enough free space.
693 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400694 while (
695 self.policies.min_free_space and
696 self._lru and
697 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000698 # self._free_disk is updated by this call.
699 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400700
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000701 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000702 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400703 usage_percent = 0.
704 if total_usage:
705 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
706
707 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000708 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
709 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
Junji Watanabe38b28b02020-04-23 10:23:30 +0000710 '%.1fkb)', len(evicted),
711 sum(evicted) / 1024., self._free_disk / 1024., total_usage / 1024.,
712 usage_percent, self.policies.max_cache_size / 1024.)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400713 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000714 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400715
716 def _path(self, digest):
717 """Returns the path to one item."""
718 return os.path.join(self.cache_dir, digest)
719
720 def _remove_lru_file(self, allow_protected):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000721 """Removes the latest recently used file and returns its size.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000722
723 Updates self._free_disk.
724 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400725 self._lock.assert_locked()
726 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000727 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400728 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000729 total_size = sum(self._lru.values())
730 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000731 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
732 '%s bytes (%.3f GiB) free_space') % (
733 self.policies, total_size, float(total_size) / 1024**3,
734 len(self._lru), self._free_disk,
735 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400736 raise NoMoreSpace(msg)
737 except KeyError:
738 # That means an internal error.
739 raise NoMoreSpace('Nothing to remove, can\'t happend')
740 digest, (size, _) = self._lru.pop_oldest()
Takuto Ikuta8d8ca9b2021-02-26 02:31:43 +0000741 logging.debug('Removing LRU file %s with size %s bytes', digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400742 self._delete_file(digest, size)
743 return size
744
745 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
746 """Adds an item into LRU cache marking it as a newest one."""
747 self._lock.assert_locked()
748 if size == UNKNOWN_FILE_SIZE:
749 size = fs.stat(self._path(digest)).st_size
750 self._added.append(size)
751 self._lru.add(digest, size)
752 self._free_disk -= size
753 # Do a quicker version of self._trim(). It only enforces free disk space,
754 # not cache size limits. It doesn't actually look at real free disk space,
755 # only uses its cache values. self._trim() will be called later to enforce
756 # real trimming but doing this quick version here makes it possible to map
757 # an isolated that is larger than the current amount of free disk space when
758 # the cache size is already large.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000759 while (self.policies.min_free_space and self._lru and
760 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000761 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400762 if self._remove_lru_file(False) == -1:
763 break
764
765 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000766 """Deletes cache file from the file system.
767
768 Updates self._free_disk.
769 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400770 self._lock.assert_locked()
771 try:
772 if size == UNKNOWN_FILE_SIZE:
773 try:
774 size = fs.stat(self._path(digest)).st_size
775 except OSError:
776 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000777 if file_path.try_remove(self._path(digest)):
778 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400779 except OSError as e:
780 if e.errno != errno.ENOENT:
781 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400782
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000783 def _get_mtime(self, digest):
784 """Get mtime of cache file."""
785 return os.path.getmtime(self._path(digest))
786
787 def _is_valid_hash(self, digest):
788 """Verify digest with supported hash algos."""
Takuto Ikuta922c8642021-11-18 07:42:16 +0000789 d = hashlib.sha256()
790 with fs.open(self._path(digest), 'rb') as f:
791 while True:
792 chunk = f.read(1024 * 1024)
793 if not chunk:
794 break
795 d.update(chunk)
796 return digest == d.hexdigest()
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000797
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400798
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400799class NamedCache(Cache):
800 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400801
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400802 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400803 name is a short identifier that describes the contents of the cache, e.g.
804 "git_v8" could be all git repositories required by v8 builds, or
805 "build_chromium" could be build artefacts of the Chromium.
806 path is a directory path relative to the task run dir. Cache installation
807 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400808 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400809 _DIR_ALPHABET = string.ascii_letters + string.digits
810 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000811 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400812
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400813 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400814 """Initializes NamedCaches.
815
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400816 Arguments:
817 - cache_dir is a directory for persistent cache storage.
818 - policies is a CachePolicies instance.
819 - time_fn is a function that returns timestamp (float) and used to take
820 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400821 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400822 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400823 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000824 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400825 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
826 self._lru = lru.LRUDict()
827 if not fs.isdir(self.cache_dir):
828 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000829 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000830 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400831 self._lru = lru.LRUDict.load(self.state_file)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000832 for _, size in self._lru.values():
Junji Watanabe7a677e92022-01-13 06:07:31 +0000833 if not isinstance(size, int):
Takuto Ikuta6acf8f92020-07-02 02:06:42 +0000834 with open(self.state_file, 'r') as f:
835 logging.info('named cache state file: %s\n%s', self.state_file,
836 f.read())
Junji Watanabeedcf47d2020-06-11 08:41:01 +0000837 raise ValueError("size is not integer: %s" % size)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000838
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400839 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000840 logging.exception(
841 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400842 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000843 fs.makedirs(self.cache_dir)
Takuto Ikutadadfbb02020-07-10 03:31:26 +0000844 self._lru = lru.LRUDict()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000845 with self._lock:
846 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400847 if time_fn:
848 self._lru.time_fn = time_fn
849
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400850 @property
851 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000852 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400853 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000854 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400855
Takuto Ikutaeab23172020-07-02 03:50:02 +0000856 def _sudo_chown(self, path):
857 if sys.platform == 'win32':
858 return
859 uid = os.getuid()
860 if os.stat(path).st_uid == uid:
861 return
862 # Maybe owner of |path| is different from runner of this script. This is to
863 # make fs.rename work in that case.
864 # https://crbug.com/986676
865 subprocess.check_call(['sudo', '-n', 'chown', str(uid), path])
866
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000867 def install(self, dst, name):
868 """Creates the directory |dst| and moves a previous named cache |name| if it
869 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400870
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000871 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400872
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000873 Returns the reused named cache size in bytes, or 0 if none was present.
874
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400875 Raises NamedCacheError if cannot install the cache.
876 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000877 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400878 with self._lock:
879 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000880 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400881 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000882 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400883
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000884 # Remove the named symlink if it exists.
885 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000886 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000887 # Remove the symlink itself, not its destination.
888 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000889
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000890 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000891 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400892 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000893 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000894 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000895 file_path.ensure_tree(os.path.dirname(dst))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000896 self._sudo_chown(abs_cache)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000897 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400898 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000899 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400900
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000901 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400902 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400903
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000904 # The named cache does not exist, create an empty directory. When
905 # uninstalling, we will move it back to the cache and create an an
906 # entry.
907 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000908 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000909 return 0
Junji Watanabed2ab86b2021-08-13 07:20:23 +0000910 except (IOError, OSError, PermissionError) as ex:
Takuto Ikuta2fe58fd2021-08-18 13:47:36 +0000911 if sys.platform == 'win32':
912 print("There may be running process in cache"
913 " e.g. https://crbug.com/1239809#c14",
914 file=sys.stderr)
915 subprocess.check_call(
916 ["powershell", "get-process | select path,starttime"])
917
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000918 # Raise using the original traceback.
919 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000920 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Junji Watanabe7a677e92022-01-13 06:07:31 +0000921 raise exc.with_traceback(sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000922 finally:
923 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400924
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000925 def uninstall(self, src, name):
926 """Moves the cache directory back into the named cache hive for an eventual
927 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400928
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000929 The opposite of install().
930
931 src must be absolute and unicode. Its content is moved back into the local
932 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400933
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000934 Returns the named cache size in bytes.
935
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400936 Raises NamedCacheError if cannot uninstall the cache.
937 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000938 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Junji Watanabe9cdfff52021-01-08 07:20:35 +0000939 start = time.time()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400940 with self._lock:
941 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000942 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400943 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000944 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000945 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400946 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400947
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000948 if name in self._lru:
949 # This shouldn't happen but just remove the preexisting one and move
950 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000951 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000952 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000953
Takuto Ikutac1bdcf22021-10-27 05:07:26 +0000954 # Calculate the size of the named cache to keep. It's important because
955 # if size is zero (it's empty), we do not want to add it back to the
956 # named caches cache.
Takuto Ikuta995da062021-03-17 05:01:59 +0000957 size = file_path.get_recursive_size(src)
Takuto Ikutac1bdcf22021-10-27 05:07:26 +0000958 logging.info('- Size is %d', size)
959 if not size:
960 # Do not save empty named cache.
961 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400962
963 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000964 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400965 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000966 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400967 file_path.ensure_tree(os.path.dirname(abs_cache))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000968 self._sudo_chown(src)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000969 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400970
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000971 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000972 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000973
974 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
975 # for user convenience.
976 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000977 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000978 file_path.remove(named_path)
979 else:
980 file_path.ensure_tree(os.path.dirname(named_path))
981
982 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000983 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000984 logging.info(
985 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000986 except OSError:
987 # Ignore on Windows. It happens when running as a normal user or when
988 # UAC is enabled and the user is a filtered administrator account.
989 if sys.platform != 'win32':
990 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000991 return size
Junji Watanabed2ab86b2021-08-13 07:20:23 +0000992 except (IOError, OSError, PermissionError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000993 # Raise using the original traceback.
994 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000995 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Junji Watanabe7a677e92022-01-13 06:07:31 +0000996 raise exc.with_traceback(sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000997 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000998 # Call save() at every uninstall. The assumptions are:
999 # - The total the number of named caches is low, so the state.json file
1000 # is small, so the time it takes to write it to disk is short.
1001 # - The number of mapped named caches per task is low, so the number of
1002 # times save() is called on tear-down isn't high enough to be
1003 # significant.
1004 # - uninstall() sometimes throws due to file locking on Windows or
1005 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001006 self._save()
Junji Watanabe9cdfff52021-01-08 07:20:35 +00001007 logging.info('NamedCache.uninstall(%r, %r) took %d seconds', src, name,
1008 time.time() - start)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001009
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001010 # Cache interface implementation.
1011
1012 def __len__(self):
1013 with self._lock:
1014 return len(self._lru)
1015
1016 def __iter__(self):
1017 # This is not thread-safe.
1018 return self._lru.__iter__()
1019
John Budorickc6186972020-02-26 00:58:14 +00001020 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001021 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +00001022 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001023
1024 @property
1025 def total_size(self):
1026 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001027 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001028
1029 def get_oldest(self):
1030 with self._lock:
1031 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001032 # (key, (value, ts))
1033 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001034 except KeyError:
1035 return None
1036
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001037 def remove_oldest(self):
1038 with self._lock:
1039 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001040 _name, size = self._remove_lru_item()
1041 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001042
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001043 def save(self):
1044 with self._lock:
1045 return self._save()
1046
John Budorickc6186972020-02-26 00:58:14 +00001047 def touch(self, *names):
1048 with self._lock:
1049 for name in names:
1050 if name in self._lru:
1051 self._lru.touch(name)
1052 self._save()
1053
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001054 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001055 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001056 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001057 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001058 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001059
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001060 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001061 if self._policies.max_items:
1062 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001063 name, size = self._remove_lru_item()
1064 evicted.append(size)
1065 logging.info(
1066 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1067 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001068
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001069 # Trim according to maximum age.
1070 if self._policies.max_age_secs:
1071 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1072 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001073 _name, (_data, ts) = self._lru.get_oldest()
1074 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001075 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001076 name, size = self._remove_lru_item()
1077 evicted.append(size)
1078 logging.info(
1079 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1080 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001081
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001082 # Trim according to minimum free space.
1083 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001084 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001085 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001086 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001087 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001088 name, size = self._remove_lru_item()
1089 evicted.append(size)
1090 logging.info(
1091 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1092 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001093
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001094 # Trim according to maximum total size.
1095 if self._policies.max_cache_size:
1096 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001097 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001098 if total <= self._policies.max_cache_size:
1099 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001100 name, size = self._remove_lru_item()
1101 evicted.append(size)
1102 logging.info(
1103 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1104 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001105
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001106 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001107 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001108
1109 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001110 """Removes unknown directories.
1111
1112 Does not recalculate the cache size since it's surprisingly slow on some
1113 OSes.
1114 """
Junji Watanabe66041012021-08-11 06:40:08 +00001115 logging.info('NamedCache.cleanup(): Cleaning %s', self.cache_dir)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001116 success = True
1117 with self._lock:
1118 try:
1119 actual = set(fs.listdir(self.cache_dir))
1120 actual.discard(self.NAMED_DIR)
1121 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001122 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001123 # First, handle the actual cache content.
1124 # Remove missing entries.
1125 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001126 name, size = self._lru.pop(expected[missing])
1127 logging.warning(
1128 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001129 # Remove unexpected items.
1130 for unexpected in (actual - set(expected)):
1131 try:
1132 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001133 logging.warning(
1134 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001135 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001136 file_path.rmtree(p)
1137 else:
1138 fs.remove(p)
1139 except (IOError, OSError) as e:
1140 logging.error('Failed to remove %s: %s', unexpected, e)
1141 success = False
1142
1143 # Second, fix named cache links.
1144 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001145 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001146 actual = set(fs.listdir(named))
1147 expected = set(self._lru)
1148 # Confirm entries. Do not add missing ones for now.
1149 for name in expected.intersection(actual):
1150 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001151 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001152 if fs.islink(p):
1153 link = fs.readlink(p)
1154 if expected_link == link:
1155 continue
1156 logging.warning(
1157 'Unexpected symlink for cache %s: %s, expected %s',
1158 name, link, expected_link)
1159 else:
1160 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001161 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001162 file_path.rmtree(p)
1163 else:
1164 fs.remove(p)
1165 # Remove unexpected items.
1166 for unexpected in (actual - expected):
1167 try:
1168 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1169 if fs.isdir(p):
1170 file_path.rmtree(p)
1171 else:
1172 fs.remove(p)
1173 except (IOError, OSError) as e:
1174 logging.error('Failed to remove %s: %s', unexpected, e)
1175 success = False
1176 finally:
1177 self._save()
1178 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001179
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001180 # Internal functions.
1181
1182 def _try_upgrade(self):
1183 """Upgrades from the old format to the new one if necessary.
1184
1185 This code can be removed so all bots are known to have the right new format.
1186 """
1187 if not self._lru:
1188 return
1189 _name, (data, _ts) = self._lru.get_oldest()
1190 if isinstance(data, (list, tuple)):
1191 return
1192 # Update to v2.
1193 def upgrade(_name, rel_cache):
1194 abs_cache = os.path.join(self.cache_dir, rel_cache)
Takuto Ikuta995da062021-03-17 05:01:59 +00001195 return rel_cache, file_path.get_recursive_size(abs_cache)
1196
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001197 self._lru.transform(upgrade)
1198 self._save()
1199
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001200 def _remove_lru_item(self):
1201 """Removes the oldest LRU entry. LRU must not be empty."""
1202 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
Takuto Ikuta74686842021-07-30 04:11:03 +00001203 logging.info('Removing named cache %r, %d', name, size)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001204 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001205 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001206
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001207 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001208 """Creates and returns relative path of a new cache directory.
1209
1210 In practice, it is a 2-letter string.
1211 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001212 # We randomly generate directory names that have two lower/upper case
1213 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1214 abc_len = len(self._DIR_ALPHABET)
1215 tried = set()
1216 while len(tried) < 1000:
1217 i = random.randint(0, abc_len * abc_len - 1)
1218 rel_path = (
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001219 self._DIR_ALPHABET[i // abc_len] + self._DIR_ALPHABET[i % abc_len])
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001220 if rel_path in tried:
1221 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001222 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001223 if not fs.exists(abs_path):
1224 return rel_path
1225 tried.add(rel_path)
1226 raise NamedCacheError(
1227 'could not allocate a new cache dir, too many cache dirs')
1228
1229 def _remove(self, name):
1230 """Removes a cache directory and entry.
1231
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001232 Returns:
1233 Number of caches deleted.
1234 """
1235 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001236 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001237 named_dir = self._get_named_path(name)
1238 if fs.islink(named_dir):
1239 fs.unlink(named_dir)
1240
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001241 # Then remove the actual data.
1242 if name not in self._lru:
1243 return
1244 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001245 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001246 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001247 file_path.rmtree(abs_path)
1248 self._lru.pop(name)
1249
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001250 def _save(self):
1251 self._lock.assert_locked()
1252 file_path.ensure_tree(self.cache_dir)
1253 self._lru.save(self.state_file)
1254
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001255 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001256 return os.path.join(self.cache_dir, self.NAMED_DIR, name)