blob: e0bd70e82f2210d1ba5a4f65c727d06a59e1549c [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Takuto Ikuta2fe58fd2021-08-18 13:47:36 +00007from __future__ import print_function
8
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04009import errno
Takuto Ikuta922c8642021-11-18 07:42:16 +000010import hashlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040011import io
12import logging
13import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import random
15import string
Junji Watanabe7b720782020-07-01 01:51:07 +000016import subprocess
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040017import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000018import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040019
20from utils import file_path
21from utils import fs
22from utils import lru
23from utils import threading_utils
24from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000025tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040026
Lei Leife202df2019-06-11 17:33:34 +000027# third_party/
28import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040029
30# The file size to be used when we don't know the correct file size,
31# generally used for .isolated files.
32UNKNOWN_FILE_SIZE = None
33
Junji Watanabed2ab86b2021-08-13 07:20:23 +000034# PermissionError isn't defined in Python2.
35if six.PY2:
36 PermissionError = None
37
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040038
39def file_write(path, content_generator):
40 """Writes file content as generated by content_generator.
41
42 Creates the intermediary directory as needed.
43
44 Returns the number of bytes written.
45
46 Meant to be mocked out in unit tests.
47 """
48 file_path.ensure_tree(os.path.dirname(path))
49 total = 0
50 with fs.open(path, 'wb') as f:
51 for d in content_generator:
52 total += len(d)
53 f.write(d)
54 return total
55
56
57def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000058 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040059
60 Currently it just checks the file exists and its size matches the expectation.
61 """
62 if size == UNKNOWN_FILE_SIZE:
63 return fs.isfile(path)
64 try:
65 actual_size = fs.stat(path).st_size
66 except OSError as e:
Junji Watanabe38b28b02020-04-23 10:23:30 +000067 logging.warning('Can\'t read item %s, assuming it\'s invalid: %s',
68 os.path.basename(path), e)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040069 return False
70 if size != actual_size:
71 logging.warning(
72 'Found invalid item %s; %d != %d',
73 os.path.basename(path), actual_size, size)
74 return False
75 return True
76
77
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000078def trim_caches(caches, path, min_free_space, max_age_secs):
79 """Trims multiple caches.
80
81 The goal here is to coherently trim all caches in a coherent LRU fashion,
82 deleting older items independent of which container they belong to.
83
84 Two policies are enforced first:
85 - max_age_secs
86 - min_free_space
87
88 Once that's done, then we enforce each cache's own policies.
89
90 Returns:
91 Slice containing the size of all items evicted.
92 """
93 min_ts = time.time() - max_age_secs if max_age_secs else 0
94 free_disk = file_path.get_free_space(path) if min_free_space else 0
Junji Watanabe66041012021-08-11 06:40:08 +000095 logging.info("Trimming caches. min_ts: %d, free_disk: %d, min_free_space: %d",
96 min_ts, free_disk, min_free_space)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000097 total = []
98 if min_ts or free_disk:
99 while True:
100 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
101 if not oldest:
102 break
Lei Leife202df2019-06-11 17:33:34 +0000103 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000104 c, ts = oldest[0]
105 if ts >= min_ts and free_disk >= min_free_space:
106 break
107 total.append(c.remove_oldest())
108 if min_free_space:
109 free_disk = file_path.get_free_space(path)
Takuto Ikuta74686842021-07-30 04:11:03 +0000110 logging.info("free_disk after removing oldest entries: %d", free_disk)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000111 # Evaluate each cache's own policies.
112 for c in caches:
113 total.extend(c.trim())
114 return total
115
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000116
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400117class NamedCacheError(Exception):
118 """Named cache specific error."""
119
120
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400121class NoMoreSpace(Exception):
122 """Not enough space to map the whole directory."""
123 pass
124
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400125
126class CachePolicies(object):
127 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
128 """Common caching policies for the multiple caches (isolated, named, cipd).
129
130 Arguments:
131 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
132 cache is effectively a leak.
133 - min_free_space: Trim if disk free space becomes lower than this value. If
134 0, it will unconditionally fill the disk.
135 - max_items: Maximum number of items to keep in the cache. If 0, do not
136 enforce a limit.
137 - max_age_secs: Maximum age an item is kept in the cache until it is
138 automatically evicted. Having a lot of dead luggage slows
139 everything down.
140 """
141 self.max_cache_size = max_cache_size
142 self.min_free_space = min_free_space
143 self.max_items = max_items
144 self.max_age_secs = max_age_secs
145
146 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000147 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
148 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
149 self.max_cache_size, float(self.max_cache_size) / 1024**3,
150 self.max_items, self.min_free_space,
151 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400152
153
154class CacheMiss(Exception):
155 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400156 def __init__(self, digest):
157 self.digest = digest
Junji Watanabe38b28b02020-04-23 10:23:30 +0000158 super(CacheMiss,
159 self).__init__('Item with digest %r is not found in cache' % digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400160
161
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400162class Cache(object):
Junji Watanabe38b28b02020-04-23 10:23:30 +0000163
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400164 def __init__(self, cache_dir):
165 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000166 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400167 assert file_path.isabs(cache_dir), cache_dir
168 self.cache_dir = cache_dir
169 self._lock = threading_utils.LockWithAssert()
170 # Profiling values.
171 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400172 self._used = []
173
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000174 def __nonzero__(self):
175 """A cache is always True.
176
177 Otherwise it falls back to __len__, which is surprising.
178 """
179 return True
180
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000181 def __bool__(self):
182 """A cache is always True.
183
184 Otherwise it falls back to __len__, which is surprising.
185 """
186 return True
187
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000188 def __len__(self):
189 """Returns the number of entries in the cache."""
190 raise NotImplementedError()
191
192 def __iter__(self):
193 """Iterates over all the entries names."""
194 raise NotImplementedError()
195
196 def __contains__(self, name):
197 """Returns if an entry is in the cache."""
198 raise NotImplementedError()
199
200 @property
201 def total_size(self):
202 """Returns the total size of the cache in bytes."""
203 raise NotImplementedError()
204
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400205 @property
206 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000207 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400208 with self._lock:
209 return self._added[:]
210
211 @property
212 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000213 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400214 with self._lock:
215 return self._used[:]
216
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000217 def get_oldest(self):
218 """Returns timestamp of oldest cache entry or None.
219
220 Returns:
221 Timestamp of the oldest item.
222
223 Used for manual trimming.
224 """
225 raise NotImplementedError()
226
227 def remove_oldest(self):
228 """Removes the oldest item from the cache.
229
230 Returns:
231 Size of the oldest item.
232
233 Used for manual trimming.
234 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400235 raise NotImplementedError()
236
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000237 def save(self):
238 """Saves the current cache to disk."""
239 raise NotImplementedError()
240
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400241 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000242 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400243
244 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000245 Slice with the size of evicted items.
246 """
247 raise NotImplementedError()
248
249 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000250 """Deletes any corrupted item from the cache, then calls trim(), then
251 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000252
253 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400254 """
255 raise NotImplementedError()
256
257
258class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400259 """Content addressed cache that stores objects temporarily.
260
261 It can be accessed concurrently from multiple threads, so it should protect
262 its internal state with some lock.
263 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400264
265 def __enter__(self):
266 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000267 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400268 return self
269
270 def __exit__(self, _exc_type, _exec_value, _traceback):
271 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000272 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400273 return False
274
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400275 def touch(self, digest, size):
276 """Ensures item is not corrupted and updates its LRU position.
277
278 Arguments:
279 digest: hash digest of item to check.
280 size: expected size of this item.
281
282 Returns:
283 True if item is in cache and not corrupted.
284 """
285 raise NotImplementedError()
286
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400287 def getfileobj(self, digest):
288 """Returns a readable file like object.
289
290 If file exists on the file system it will have a .name attribute with an
291 absolute path to the file.
292 """
293 raise NotImplementedError()
294
295 def write(self, digest, content):
296 """Reads data from |content| generator and stores it in cache.
297
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000298 It is possible to write to an object that already exists. It may be
299 ignored (sent to /dev/null) but the timestamp is still updated.
300
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400301 Returns digest to simplify chaining.
302 """
303 raise NotImplementedError()
304
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400305
306class MemoryContentAddressedCache(ContentAddressedCache):
307 """ContentAddressedCache implementation that stores everything in memory."""
308
Lei Leife202df2019-06-11 17:33:34 +0000309 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400310 """Args:
311 file_mode_mask: bit mask to AND file mode with. Default value will make
312 all mapped files to be read only.
313 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400314 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400315 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000316 # Items in a LRU lookup dict(digest: size).
317 self._lru = lru.LRUDict()
318
319 # Cache interface implementation.
320
321 def __len__(self):
322 with self._lock:
323 return len(self._lru)
324
325 def __iter__(self):
326 # This is not thread-safe.
327 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400328
329 def __contains__(self, digest):
330 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000331 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400332
333 @property
334 def total_size(self):
335 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000336 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400337
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000338 def get_oldest(self):
339 with self._lock:
340 try:
341 # (key, (value, ts))
342 return self._lru.get_oldest()[1][1]
343 except KeyError:
344 return None
345
346 def remove_oldest(self):
347 with self._lock:
348 # TODO(maruel): Update self._added.
349 # (key, (value, ts))
350 return len(self._lru.pop_oldest()[1][0])
351
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000352 def save(self):
353 pass
354
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000355 def trim(self):
356 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000357 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400358
359 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000360 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400361 pass
362
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000363 # ContentAddressedCache interface implementation.
364
365 def __contains__(self, digest):
366 with self._lock:
367 return digest in self._lru
368
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400369 def touch(self, digest, size):
370 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000371 try:
372 self._lru.touch(digest)
373 except KeyError:
374 return False
375 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400376
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400377 def getfileobj(self, digest):
378 with self._lock:
379 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000380 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400381 except KeyError:
382 raise CacheMiss(digest)
383 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000384 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400385 return io.BytesIO(d)
386
387 def write(self, digest, content):
388 # Assemble whole stream before taking the lock.
Lei Lei73a5f732020-03-23 20:36:14 +0000389 data = six.b('').join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400390 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000391 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400392 self._added.append(len(data))
393 return digest
394
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400395
396class DiskContentAddressedCache(ContentAddressedCache):
397 """Stateful LRU cache in a flat hash table in a directory.
398
399 Saves its state as json file.
400 """
401 STATE_FILE = u'state.json'
402
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000403 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400404 """
405 Arguments:
406 cache_dir: directory where to place the cache.
407 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400408 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000409 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400410 """
411 # All protected methods (starting with '_') except _path should be called
412 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400413 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400414 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400415 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
416 # Items in a LRU lookup dict(digest: size).
417 self._lru = lru.LRUDict()
418 # Current cached free disk space. It is updated by self._trim().
419 file_path.ensure_tree(self.cache_dir)
420 self._free_disk = file_path.get_free_space(self.cache_dir)
421 # The first item in the LRU cache that must not be evicted during this run
422 # since it was referenced. All items more recent that _protected in the LRU
423 # cache are also inherently protected. It could be a set() of all items
424 # referenced but this increases memory usage without a use case.
425 self._protected = None
426 # Cleanup operations done by self._load(), if any.
427 self._operations = []
428 with tools.Profiler('Setup'):
429 with self._lock:
430 self._load(trim, time_fn)
431
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000432 # Cache interface implementation.
433
434 def __len__(self):
435 with self._lock:
436 return len(self._lru)
437
438 def __iter__(self):
439 # This is not thread-safe.
440 return self._lru.__iter__()
441
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400442 def __contains__(self, digest):
443 with self._lock:
444 return digest in self._lru
445
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400446 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400447 def total_size(self):
448 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000449 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400450
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000451 def get_oldest(self):
452 with self._lock:
453 try:
454 # (key, (value, ts))
455 return self._lru.get_oldest()[1][1]
456 except KeyError:
457 return None
458
459 def remove_oldest(self):
460 with self._lock:
461 # TODO(maruel): Update self._added.
462 return self._remove_lru_file(True)
463
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000464 def save(self):
465 with self._lock:
466 return self._save()
467
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000468 def trim(self):
469 """Forces retention policies."""
470 with self._lock:
471 return self._trim()
472
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400473 def cleanup(self):
474 """Cleans up the cache directory.
475
476 Ensures there is no unknown files in cache_dir.
477 Ensures the read-only bits are set correctly.
478
479 At that point, the cache was already loaded, trimmed to respect cache
480 policies.
481 """
Junji Watanabe66041012021-08-11 06:40:08 +0000482 logging.info('DiskContentAddressedCache.cleanup(): Cleaning %s',
483 self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400484 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000485 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400486 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000487 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400488 # It'd be faster if there were a readdir() function.
489 for filename in fs.listdir(self.cache_dir):
490 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000491 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400492 continue
493 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000494 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400495 previous.remove(filename)
496 continue
497
498 # An untracked file. Delete it.
Junji Watanabe66041012021-08-11 06:40:08 +0000499 logging.warning(
500 'DiskContentAddressedCache.cleanup(): Removing unknown file %s',
501 filename)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400502 p = self._path(filename)
503 if fs.isdir(p):
504 try:
505 file_path.rmtree(p)
506 except OSError:
507 pass
508 else:
509 file_path.try_remove(p)
510 continue
511
512 if previous:
513 # Filter out entries that were not found.
Junji Watanabe66041012021-08-11 06:40:08 +0000514 logging.warning(
515 'DiskContentAddressedCache.cleanup(): Removed %d lost files',
516 len(previous))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400517 for filename in previous:
518 self._lru.pop(filename)
519 self._save()
520
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000521 # Verify hash of every single item to detect corruption. the corrupted
522 # files will be evicted.
Junji Watanabe66041012021-08-11 06:40:08 +0000523 total = 0
524 verified = 0
525 deleted = 0
526 logging.info(
527 'DiskContentAddressedCache.cleanup(): Verifying modified files')
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000528 with self._lock:
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000529 for digest, (_, timestamp) in list(self._lru._items.items()):
Junji Watanabe66041012021-08-11 06:40:08 +0000530 total += 1
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000531 # verify only if the mtime is grather than the timestamp in state.json
532 # to avoid take too long time.
533 if self._get_mtime(digest) <= timestamp:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000534 continue
Junji Watanabe66041012021-08-11 06:40:08 +0000535 logging.warning(
536 'DiskContentAddressedCache.cleanup(): Item has been modified.'
537 ' verifying item: %s', digest)
538 is_valid = self._is_valid_hash(digest)
539 verified += 1
540 logging.warning(
541 'DiskContentAddressedCache.cleanup(): verified. is_valid: %s, '
542 'item: %s', is_valid, digest)
543 if is_valid:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000544 # Update timestamp in state.json
545 self._lru.touch(digest)
546 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000547 # remove corrupted file from LRU and file system
548 self._lru.pop(digest)
549 self._delete_file(digest, UNKNOWN_FILE_SIZE)
Junji Watanabe66041012021-08-11 06:40:08 +0000550 deleted += 1
551 logging.error(
552 'DiskContentAddressedCache.cleanup(): Deleted corrupted item: %s',
553 digest)
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000554 self._save()
Junji Watanabe66041012021-08-11 06:40:08 +0000555 logging.info(
556 'DiskContentAddressedCache.cleanup(): Verified modified files.'
557 ' total: %d, verified: %d, deleted: %d', total, verified, deleted)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400558
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000559 # ContentAddressedCache interface implementation.
560
561 def __contains__(self, digest):
562 with self._lock:
563 return digest in self._lru
564
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400565 def touch(self, digest, size):
566 """Verifies an actual file is valid and bumps its LRU position.
567
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000568 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400569
570 Note that is doesn't compute the hash so it could still be corrupted if the
571 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400572 """
573 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000574 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400575
576 # Update its LRU position.
577 with self._lock:
578 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000579 if looks_valid:
580 # Exists but not in the LRU anymore.
581 self._delete_file(digest, size)
582 return False
583 if not looks_valid:
584 self._lru.pop(digest)
585 # Exists but not in the LRU anymore.
586 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400587 return False
588 self._lru.touch(digest)
589 self._protected = self._protected or digest
590 return True
591
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400592 def getfileobj(self, digest):
593 try:
594 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400595 except IOError:
596 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000597 with self._lock:
598 try:
599 self._used.append(self._lru[digest])
600 except KeyError:
601 # If the digest is not actually in _lru, assume it is a cache miss.
602 # Existing file will be overwritten by whoever uses the cache and added
603 # to _lru.
604 f.close()
605 raise CacheMiss(digest)
606 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400607
608 def write(self, digest, content):
609 assert content is not None
610 with self._lock:
611 self._protected = self._protected or digest
612 path = self._path(digest)
613 # A stale broken file may remain. It is possible for the file to have write
614 # access bit removed which would cause the file_write() call to fail to open
615 # in write mode. Take no chance here.
616 file_path.try_remove(path)
617 try:
618 size = file_write(path, content)
619 except:
620 # There are two possible places were an exception can occur:
621 # 1) Inside |content| generator in case of network or unzipping errors.
622 # 2) Inside file_write itself in case of disk IO errors.
623 # In any case delete an incomplete file and propagate the exception to
624 # caller, it will be logged there.
625 file_path.try_remove(path)
626 raise
627 # Make the file read-only in the cache. This has a few side-effects since
628 # the file node is modified, so every directory entries to this file becomes
629 # read-only. It's fine here because it is a new file.
630 file_path.set_read_only(path, True)
631 with self._lock:
632 self._add(digest, size)
633 return digest
634
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000635 # Internal functions.
636
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400637 def _load(self, trim, time_fn):
638 """Loads state of the cache from json file.
639
640 If cache_dir does not exist on disk, it is created.
641 """
642 self._lock.assert_locked()
643
644 if not fs.isfile(self.state_file):
645 if not fs.isdir(self.cache_dir):
646 fs.makedirs(self.cache_dir)
647 else:
648 # Load state of the cache.
649 try:
650 self._lru = lru.LRUDict.load(self.state_file)
651 except ValueError as err:
652 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000653 # Don't want to keep broken cache dir.
654 file_path.rmtree(self.cache_dir)
655 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000656 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400657 if time_fn:
658 self._lru.time_fn = time_fn
659 if trim:
660 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400661
662 def _save(self):
663 """Saves the LRU ordering."""
664 self._lock.assert_locked()
665 if sys.platform != 'win32':
666 d = os.path.dirname(self.state_file)
667 if fs.isdir(d):
668 # Necessary otherwise the file can't be created.
669 file_path.set_read_only(d, False)
670 if fs.isfile(self.state_file):
671 file_path.set_read_only(self.state_file, False)
672 self._lru.save(self.state_file)
673
674 def _trim(self):
675 """Trims anything we don't know, make sure enough free space exists."""
676 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000677 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400678
679 # Trim old items.
680 if self.policies.max_age_secs:
681 cutoff = self._lru.time_fn() - self.policies.max_age_secs
682 while self._lru:
683 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000684 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400685 if oldest[1][1] >= cutoff:
686 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000687 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400688
689 # Ensure maximum cache size.
690 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000691 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400692 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000693 e = self._remove_lru_file(True)
694 evicted.append(e)
695 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400696
697 # Ensure maximum number of items in the cache.
698 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000699 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000700 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400701
702 # Ensure enough free space.
703 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400704 while (
705 self.policies.min_free_space and
706 self._lru and
707 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000708 # self._free_disk is updated by this call.
709 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400710
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000711 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000712 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400713 usage_percent = 0.
714 if total_usage:
715 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
716
717 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000718 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
719 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
Junji Watanabe38b28b02020-04-23 10:23:30 +0000720 '%.1fkb)', len(evicted),
721 sum(evicted) / 1024., self._free_disk / 1024., total_usage / 1024.,
722 usage_percent, self.policies.max_cache_size / 1024.)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400723 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000724 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400725
726 def _path(self, digest):
727 """Returns the path to one item."""
728 return os.path.join(self.cache_dir, digest)
729
730 def _remove_lru_file(self, allow_protected):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000731 """Removes the latest recently used file and returns its size.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000732
733 Updates self._free_disk.
734 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400735 self._lock.assert_locked()
736 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000737 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400738 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000739 total_size = sum(self._lru.values())
740 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000741 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
742 '%s bytes (%.3f GiB) free_space') % (
743 self.policies, total_size, float(total_size) / 1024**3,
744 len(self._lru), self._free_disk,
745 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400746 raise NoMoreSpace(msg)
747 except KeyError:
748 # That means an internal error.
749 raise NoMoreSpace('Nothing to remove, can\'t happend')
750 digest, (size, _) = self._lru.pop_oldest()
Takuto Ikuta8d8ca9b2021-02-26 02:31:43 +0000751 logging.debug('Removing LRU file %s with size %s bytes', digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400752 self._delete_file(digest, size)
753 return size
754
755 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
756 """Adds an item into LRU cache marking it as a newest one."""
757 self._lock.assert_locked()
758 if size == UNKNOWN_FILE_SIZE:
759 size = fs.stat(self._path(digest)).st_size
760 self._added.append(size)
761 self._lru.add(digest, size)
762 self._free_disk -= size
763 # Do a quicker version of self._trim(). It only enforces free disk space,
764 # not cache size limits. It doesn't actually look at real free disk space,
765 # only uses its cache values. self._trim() will be called later to enforce
766 # real trimming but doing this quick version here makes it possible to map
767 # an isolated that is larger than the current amount of free disk space when
768 # the cache size is already large.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000769 while (self.policies.min_free_space and self._lru and
770 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000771 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400772 if self._remove_lru_file(False) == -1:
773 break
774
775 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000776 """Deletes cache file from the file system.
777
778 Updates self._free_disk.
779 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400780 self._lock.assert_locked()
781 try:
782 if size == UNKNOWN_FILE_SIZE:
783 try:
784 size = fs.stat(self._path(digest)).st_size
785 except OSError:
786 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000787 if file_path.try_remove(self._path(digest)):
788 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400789 except OSError as e:
790 if e.errno != errno.ENOENT:
791 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400792
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000793 def _get_mtime(self, digest):
794 """Get mtime of cache file."""
795 return os.path.getmtime(self._path(digest))
796
797 def _is_valid_hash(self, digest):
798 """Verify digest with supported hash algos."""
Takuto Ikuta922c8642021-11-18 07:42:16 +0000799 d = hashlib.sha256()
800 with fs.open(self._path(digest), 'rb') as f:
801 while True:
802 chunk = f.read(1024 * 1024)
803 if not chunk:
804 break
805 d.update(chunk)
806 return digest == d.hexdigest()
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000807
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400808
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400809class NamedCache(Cache):
810 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400811
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400812 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400813 name is a short identifier that describes the contents of the cache, e.g.
814 "git_v8" could be all git repositories required by v8 builds, or
815 "build_chromium" could be build artefacts of the Chromium.
816 path is a directory path relative to the task run dir. Cache installation
817 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400818 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400819 _DIR_ALPHABET = string.ascii_letters + string.digits
820 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000821 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400822
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400823 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400824 """Initializes NamedCaches.
825
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400826 Arguments:
827 - cache_dir is a directory for persistent cache storage.
828 - policies is a CachePolicies instance.
829 - time_fn is a function that returns timestamp (float) and used to take
830 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400831 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400832 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400833 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000834 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400835 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
836 self._lru = lru.LRUDict()
837 if not fs.isdir(self.cache_dir):
838 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000839 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000840 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400841 self._lru = lru.LRUDict.load(self.state_file)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000842 for _, size in self._lru.values():
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000843 if not isinstance(size, six.integer_types):
Takuto Ikuta6acf8f92020-07-02 02:06:42 +0000844 with open(self.state_file, 'r') as f:
845 logging.info('named cache state file: %s\n%s', self.state_file,
846 f.read())
Junji Watanabeedcf47d2020-06-11 08:41:01 +0000847 raise ValueError("size is not integer: %s" % size)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000848
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400849 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000850 logging.exception(
851 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400852 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000853 fs.makedirs(self.cache_dir)
Takuto Ikutadadfbb02020-07-10 03:31:26 +0000854 self._lru = lru.LRUDict()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000855 with self._lock:
856 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400857 if time_fn:
858 self._lru.time_fn = time_fn
859
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400860 @property
861 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000862 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400863 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000864 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400865
Takuto Ikutaeab23172020-07-02 03:50:02 +0000866 def _sudo_chown(self, path):
867 if sys.platform == 'win32':
868 return
869 uid = os.getuid()
870 if os.stat(path).st_uid == uid:
871 return
872 # Maybe owner of |path| is different from runner of this script. This is to
873 # make fs.rename work in that case.
874 # https://crbug.com/986676
875 subprocess.check_call(['sudo', '-n', 'chown', str(uid), path])
876
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000877 def install(self, dst, name):
878 """Creates the directory |dst| and moves a previous named cache |name| if it
879 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400880
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000881 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400882
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000883 Returns the reused named cache size in bytes, or 0 if none was present.
884
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400885 Raises NamedCacheError if cannot install the cache.
886 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000887 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400888 with self._lock:
889 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000890 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400891 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000892 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400893
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000894 # Remove the named symlink if it exists.
895 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000896 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000897 # Remove the symlink itself, not its destination.
898 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000899
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000900 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000901 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400902 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000903 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000904 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000905 file_path.ensure_tree(os.path.dirname(dst))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000906 self._sudo_chown(abs_cache)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000907 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400908 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000909 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400910
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000911 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400912 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400913
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000914 # The named cache does not exist, create an empty directory. When
915 # uninstalling, we will move it back to the cache and create an an
916 # entry.
917 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000918 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000919 return 0
Junji Watanabed2ab86b2021-08-13 07:20:23 +0000920 except (IOError, OSError, PermissionError) as ex:
Takuto Ikuta2fe58fd2021-08-18 13:47:36 +0000921 if sys.platform == 'win32':
922 print("There may be running process in cache"
923 " e.g. https://crbug.com/1239809#c14",
924 file=sys.stderr)
925 subprocess.check_call(
926 ["powershell", "get-process | select path,starttime"])
927
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000928 # Raise using the original traceback.
929 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000930 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000931 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000932 finally:
933 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400934
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000935 def uninstall(self, src, name):
936 """Moves the cache directory back into the named cache hive for an eventual
937 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400938
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000939 The opposite of install().
940
941 src must be absolute and unicode. Its content is moved back into the local
942 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400943
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000944 Returns the named cache size in bytes.
945
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400946 Raises NamedCacheError if cannot uninstall the cache.
947 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000948 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Junji Watanabe9cdfff52021-01-08 07:20:35 +0000949 start = time.time()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400950 with self._lock:
951 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000952 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400953 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000954 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000955 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400956 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400957
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000958 if name in self._lru:
959 # This shouldn't happen but just remove the preexisting one and move
960 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000961 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000962 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000963
Takuto Ikutac1bdcf22021-10-27 05:07:26 +0000964 # Calculate the size of the named cache to keep. It's important because
965 # if size is zero (it's empty), we do not want to add it back to the
966 # named caches cache.
Takuto Ikuta995da062021-03-17 05:01:59 +0000967 size = file_path.get_recursive_size(src)
Takuto Ikutac1bdcf22021-10-27 05:07:26 +0000968 logging.info('- Size is %d', size)
969 if not size:
970 # Do not save empty named cache.
971 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400972
973 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000974 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400975 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000976 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400977 file_path.ensure_tree(os.path.dirname(abs_cache))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000978 self._sudo_chown(src)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000979 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400980
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000981 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000982 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000983
984 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
985 # for user convenience.
986 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000987 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000988 file_path.remove(named_path)
989 else:
990 file_path.ensure_tree(os.path.dirname(named_path))
991
992 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000993 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000994 logging.info(
995 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000996 except OSError:
997 # Ignore on Windows. It happens when running as a normal user or when
998 # UAC is enabled and the user is a filtered administrator account.
999 if sys.platform != 'win32':
1000 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001001 return size
Junji Watanabed2ab86b2021-08-13 07:20:23 +00001002 except (IOError, OSError, PermissionError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +00001003 # Raise using the original traceback.
1004 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +00001005 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001006 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001007 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001008 # Call save() at every uninstall. The assumptions are:
1009 # - The total the number of named caches is low, so the state.json file
1010 # is small, so the time it takes to write it to disk is short.
1011 # - The number of mapped named caches per task is low, so the number of
1012 # times save() is called on tear-down isn't high enough to be
1013 # significant.
1014 # - uninstall() sometimes throws due to file locking on Windows or
1015 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001016 self._save()
Junji Watanabe9cdfff52021-01-08 07:20:35 +00001017 logging.info('NamedCache.uninstall(%r, %r) took %d seconds', src, name,
1018 time.time() - start)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001019
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001020 # Cache interface implementation.
1021
1022 def __len__(self):
1023 with self._lock:
1024 return len(self._lru)
1025
1026 def __iter__(self):
1027 # This is not thread-safe.
1028 return self._lru.__iter__()
1029
John Budorickc6186972020-02-26 00:58:14 +00001030 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001031 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +00001032 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001033
1034 @property
1035 def total_size(self):
1036 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001037 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001038
1039 def get_oldest(self):
1040 with self._lock:
1041 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001042 # (key, (value, ts))
1043 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001044 except KeyError:
1045 return None
1046
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001047 def remove_oldest(self):
1048 with self._lock:
1049 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001050 _name, size = self._remove_lru_item()
1051 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001052
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001053 def save(self):
1054 with self._lock:
1055 return self._save()
1056
John Budorickc6186972020-02-26 00:58:14 +00001057 def touch(self, *names):
1058 with self._lock:
1059 for name in names:
1060 if name in self._lru:
1061 self._lru.touch(name)
1062 self._save()
1063
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001064 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001065 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001066 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001067 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001068 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001069
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001070 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001071 if self._policies.max_items:
1072 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001073 name, size = self._remove_lru_item()
1074 evicted.append(size)
1075 logging.info(
1076 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1077 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001078
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001079 # Trim according to maximum age.
1080 if self._policies.max_age_secs:
1081 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1082 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001083 _name, (_data, ts) = self._lru.get_oldest()
1084 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001085 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001086 name, size = self._remove_lru_item()
1087 evicted.append(size)
1088 logging.info(
1089 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1090 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001091
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001092 # Trim according to minimum free space.
1093 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001094 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001095 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001096 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001097 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001098 name, size = self._remove_lru_item()
1099 evicted.append(size)
1100 logging.info(
1101 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1102 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001103
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001104 # Trim according to maximum total size.
1105 if self._policies.max_cache_size:
1106 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001107 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001108 if total <= self._policies.max_cache_size:
1109 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001110 name, size = self._remove_lru_item()
1111 evicted.append(size)
1112 logging.info(
1113 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1114 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001115
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001116 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001117 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001118
1119 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001120 """Removes unknown directories.
1121
1122 Does not recalculate the cache size since it's surprisingly slow on some
1123 OSes.
1124 """
Junji Watanabe66041012021-08-11 06:40:08 +00001125 logging.info('NamedCache.cleanup(): Cleaning %s', self.cache_dir)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001126 success = True
1127 with self._lock:
1128 try:
1129 actual = set(fs.listdir(self.cache_dir))
1130 actual.discard(self.NAMED_DIR)
1131 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001132 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001133 # First, handle the actual cache content.
1134 # Remove missing entries.
1135 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001136 name, size = self._lru.pop(expected[missing])
1137 logging.warning(
1138 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001139 # Remove unexpected items.
1140 for unexpected in (actual - set(expected)):
1141 try:
1142 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001143 logging.warning(
1144 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001145 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001146 file_path.rmtree(p)
1147 else:
1148 fs.remove(p)
1149 except (IOError, OSError) as e:
1150 logging.error('Failed to remove %s: %s', unexpected, e)
1151 success = False
1152
1153 # Second, fix named cache links.
1154 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001155 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001156 actual = set(fs.listdir(named))
1157 expected = set(self._lru)
1158 # Confirm entries. Do not add missing ones for now.
1159 for name in expected.intersection(actual):
1160 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001161 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001162 if fs.islink(p):
1163 link = fs.readlink(p)
1164 if expected_link == link:
1165 continue
1166 logging.warning(
1167 'Unexpected symlink for cache %s: %s, expected %s',
1168 name, link, expected_link)
1169 else:
1170 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001171 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001172 file_path.rmtree(p)
1173 else:
1174 fs.remove(p)
1175 # Remove unexpected items.
1176 for unexpected in (actual - expected):
1177 try:
1178 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1179 if fs.isdir(p):
1180 file_path.rmtree(p)
1181 else:
1182 fs.remove(p)
1183 except (IOError, OSError) as e:
1184 logging.error('Failed to remove %s: %s', unexpected, e)
1185 success = False
1186 finally:
1187 self._save()
1188 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001189
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001190 # Internal functions.
1191
1192 def _try_upgrade(self):
1193 """Upgrades from the old format to the new one if necessary.
1194
1195 This code can be removed so all bots are known to have the right new format.
1196 """
1197 if not self._lru:
1198 return
1199 _name, (data, _ts) = self._lru.get_oldest()
1200 if isinstance(data, (list, tuple)):
1201 return
1202 # Update to v2.
1203 def upgrade(_name, rel_cache):
1204 abs_cache = os.path.join(self.cache_dir, rel_cache)
Takuto Ikuta995da062021-03-17 05:01:59 +00001205 return rel_cache, file_path.get_recursive_size(abs_cache)
1206
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001207 self._lru.transform(upgrade)
1208 self._save()
1209
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001210 def _remove_lru_item(self):
1211 """Removes the oldest LRU entry. LRU must not be empty."""
1212 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
Takuto Ikuta74686842021-07-30 04:11:03 +00001213 logging.info('Removing named cache %r, %d', name, size)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001214 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001215 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001216
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001217 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001218 """Creates and returns relative path of a new cache directory.
1219
1220 In practice, it is a 2-letter string.
1221 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001222 # We randomly generate directory names that have two lower/upper case
1223 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1224 abc_len = len(self._DIR_ALPHABET)
1225 tried = set()
1226 while len(tried) < 1000:
1227 i = random.randint(0, abc_len * abc_len - 1)
1228 rel_path = (
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001229 self._DIR_ALPHABET[i // abc_len] + self._DIR_ALPHABET[i % abc_len])
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001230 if rel_path in tried:
1231 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001232 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001233 if not fs.exists(abs_path):
1234 return rel_path
1235 tried.add(rel_path)
1236 raise NamedCacheError(
1237 'could not allocate a new cache dir, too many cache dirs')
1238
1239 def _remove(self, name):
1240 """Removes a cache directory and entry.
1241
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001242 Returns:
1243 Number of caches deleted.
1244 """
1245 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001246 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001247 named_dir = self._get_named_path(name)
1248 if fs.islink(named_dir):
1249 fs.unlink(named_dir)
1250
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001251 # Then remove the actual data.
1252 if name not in self._lru:
1253 return
1254 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001255 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001256 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001257 file_path.rmtree(abs_path)
1258 self._lru.pop(name)
1259
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001260 def _save(self):
1261 self._lock.assert_locked()
1262 file_path.ensure_tree(self.cache_dir)
1263 self._lru.save(self.state_file)
1264
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001265 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001266 return os.path.join(self.cache_dir, self.NAMED_DIR, name)