blob: 27d15b5facbe054a53859dd1cfffa83ceccd304f [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04007import errno
8import io
9import logging
10import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040011import random
12import string
Junji Watanabe7b720782020-07-01 01:51:07 +000013import subprocess
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040014import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000015import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040016
17from utils import file_path
18from utils import fs
19from utils import lru
20from utils import threading_utils
21from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000022tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040023
Lei Leife202df2019-06-11 17:33:34 +000024# third_party/
25import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040026
Junji Watanabe5e73aab2020-04-09 04:20:27 +000027import isolated_format
28
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040029# The file size to be used when we don't know the correct file size,
30# generally used for .isolated files.
31UNKNOWN_FILE_SIZE = None
32
Junji Watanabed2ab86b2021-08-13 07:20:23 +000033# PermissionError isn't defined in Python2.
34if six.PY2:
35 PermissionError = None
36
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040037
38def file_write(path, content_generator):
39 """Writes file content as generated by content_generator.
40
41 Creates the intermediary directory as needed.
42
43 Returns the number of bytes written.
44
45 Meant to be mocked out in unit tests.
46 """
47 file_path.ensure_tree(os.path.dirname(path))
48 total = 0
49 with fs.open(path, 'wb') as f:
50 for d in content_generator:
51 total += len(d)
52 f.write(d)
53 return total
54
55
56def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000057 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040058
59 Currently it just checks the file exists and its size matches the expectation.
60 """
61 if size == UNKNOWN_FILE_SIZE:
62 return fs.isfile(path)
63 try:
64 actual_size = fs.stat(path).st_size
65 except OSError as e:
Junji Watanabe38b28b02020-04-23 10:23:30 +000066 logging.warning('Can\'t read item %s, assuming it\'s invalid: %s',
67 os.path.basename(path), e)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040068 return False
69 if size != actual_size:
70 logging.warning(
71 'Found invalid item %s; %d != %d',
72 os.path.basename(path), actual_size, size)
73 return False
74 return True
75
76
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000077def trim_caches(caches, path, min_free_space, max_age_secs):
78 """Trims multiple caches.
79
80 The goal here is to coherently trim all caches in a coherent LRU fashion,
81 deleting older items independent of which container they belong to.
82
83 Two policies are enforced first:
84 - max_age_secs
85 - min_free_space
86
87 Once that's done, then we enforce each cache's own policies.
88
89 Returns:
90 Slice containing the size of all items evicted.
91 """
92 min_ts = time.time() - max_age_secs if max_age_secs else 0
93 free_disk = file_path.get_free_space(path) if min_free_space else 0
Junji Watanabe66041012021-08-11 06:40:08 +000094 logging.info("Trimming caches. min_ts: %d, free_disk: %d, min_free_space: %d",
95 min_ts, free_disk, min_free_space)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000096 total = []
97 if min_ts or free_disk:
98 while True:
99 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
100 if not oldest:
101 break
Lei Leife202df2019-06-11 17:33:34 +0000102 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000103 c, ts = oldest[0]
104 if ts >= min_ts and free_disk >= min_free_space:
105 break
106 total.append(c.remove_oldest())
107 if min_free_space:
108 free_disk = file_path.get_free_space(path)
Takuto Ikuta74686842021-07-30 04:11:03 +0000109 logging.info("free_disk after removing oldest entries: %d", free_disk)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000110 # Evaluate each cache's own policies.
111 for c in caches:
112 total.extend(c.trim())
113 return total
114
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000115
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400116class NamedCacheError(Exception):
117 """Named cache specific error."""
118
119
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400120class NoMoreSpace(Exception):
121 """Not enough space to map the whole directory."""
122 pass
123
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400124
125class CachePolicies(object):
126 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
127 """Common caching policies for the multiple caches (isolated, named, cipd).
128
129 Arguments:
130 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
131 cache is effectively a leak.
132 - min_free_space: Trim if disk free space becomes lower than this value. If
133 0, it will unconditionally fill the disk.
134 - max_items: Maximum number of items to keep in the cache. If 0, do not
135 enforce a limit.
136 - max_age_secs: Maximum age an item is kept in the cache until it is
137 automatically evicted. Having a lot of dead luggage slows
138 everything down.
139 """
140 self.max_cache_size = max_cache_size
141 self.min_free_space = min_free_space
142 self.max_items = max_items
143 self.max_age_secs = max_age_secs
144
145 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000146 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
147 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
148 self.max_cache_size, float(self.max_cache_size) / 1024**3,
149 self.max_items, self.min_free_space,
150 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400151
152
153class CacheMiss(Exception):
154 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400155 def __init__(self, digest):
156 self.digest = digest
Junji Watanabe38b28b02020-04-23 10:23:30 +0000157 super(CacheMiss,
158 self).__init__('Item with digest %r is not found in cache' % digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400159
160
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400161class Cache(object):
Junji Watanabe38b28b02020-04-23 10:23:30 +0000162
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400163 def __init__(self, cache_dir):
164 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000165 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400166 assert file_path.isabs(cache_dir), cache_dir
167 self.cache_dir = cache_dir
168 self._lock = threading_utils.LockWithAssert()
169 # Profiling values.
170 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400171 self._used = []
172
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000173 def __nonzero__(self):
174 """A cache is always True.
175
176 Otherwise it falls back to __len__, which is surprising.
177 """
178 return True
179
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000180 def __bool__(self):
181 """A cache is always True.
182
183 Otherwise it falls back to __len__, which is surprising.
184 """
185 return True
186
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000187 def __len__(self):
188 """Returns the number of entries in the cache."""
189 raise NotImplementedError()
190
191 def __iter__(self):
192 """Iterates over all the entries names."""
193 raise NotImplementedError()
194
195 def __contains__(self, name):
196 """Returns if an entry is in the cache."""
197 raise NotImplementedError()
198
199 @property
200 def total_size(self):
201 """Returns the total size of the cache in bytes."""
202 raise NotImplementedError()
203
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400204 @property
205 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000206 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400207 with self._lock:
208 return self._added[:]
209
210 @property
211 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000212 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400213 with self._lock:
214 return self._used[:]
215
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000216 def get_oldest(self):
217 """Returns timestamp of oldest cache entry or None.
218
219 Returns:
220 Timestamp of the oldest item.
221
222 Used for manual trimming.
223 """
224 raise NotImplementedError()
225
226 def remove_oldest(self):
227 """Removes the oldest item from the cache.
228
229 Returns:
230 Size of the oldest item.
231
232 Used for manual trimming.
233 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400234 raise NotImplementedError()
235
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000236 def save(self):
237 """Saves the current cache to disk."""
238 raise NotImplementedError()
239
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400240 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000241 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400242
243 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000244 Slice with the size of evicted items.
245 """
246 raise NotImplementedError()
247
248 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000249 """Deletes any corrupted item from the cache, then calls trim(), then
250 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000251
252 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400253 """
254 raise NotImplementedError()
255
256
257class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400258 """Content addressed cache that stores objects temporarily.
259
260 It can be accessed concurrently from multiple threads, so it should protect
261 its internal state with some lock.
262 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400263
264 def __enter__(self):
265 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000266 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400267 return self
268
269 def __exit__(self, _exc_type, _exec_value, _traceback):
270 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000271 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400272 return False
273
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400274 def touch(self, digest, size):
275 """Ensures item is not corrupted and updates its LRU position.
276
277 Arguments:
278 digest: hash digest of item to check.
279 size: expected size of this item.
280
281 Returns:
282 True if item is in cache and not corrupted.
283 """
284 raise NotImplementedError()
285
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400286 def getfileobj(self, digest):
287 """Returns a readable file like object.
288
289 If file exists on the file system it will have a .name attribute with an
290 absolute path to the file.
291 """
292 raise NotImplementedError()
293
294 def write(self, digest, content):
295 """Reads data from |content| generator and stores it in cache.
296
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000297 It is possible to write to an object that already exists. It may be
298 ignored (sent to /dev/null) but the timestamp is still updated.
299
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400300 Returns digest to simplify chaining.
301 """
302 raise NotImplementedError()
303
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400304
305class MemoryContentAddressedCache(ContentAddressedCache):
306 """ContentAddressedCache implementation that stores everything in memory."""
307
Lei Leife202df2019-06-11 17:33:34 +0000308 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400309 """Args:
310 file_mode_mask: bit mask to AND file mode with. Default value will make
311 all mapped files to be read only.
312 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400313 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400314 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000315 # Items in a LRU lookup dict(digest: size).
316 self._lru = lru.LRUDict()
317
318 # Cache interface implementation.
319
320 def __len__(self):
321 with self._lock:
322 return len(self._lru)
323
324 def __iter__(self):
325 # This is not thread-safe.
326 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400327
328 def __contains__(self, digest):
329 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000330 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400331
332 @property
333 def total_size(self):
334 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000335 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400336
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000337 def get_oldest(self):
338 with self._lock:
339 try:
340 # (key, (value, ts))
341 return self._lru.get_oldest()[1][1]
342 except KeyError:
343 return None
344
345 def remove_oldest(self):
346 with self._lock:
347 # TODO(maruel): Update self._added.
348 # (key, (value, ts))
349 return len(self._lru.pop_oldest()[1][0])
350
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000351 def save(self):
352 pass
353
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000354 def trim(self):
355 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000356 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400357
358 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000359 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400360 pass
361
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000362 # ContentAddressedCache interface implementation.
363
364 def __contains__(self, digest):
365 with self._lock:
366 return digest in self._lru
367
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400368 def touch(self, digest, size):
369 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000370 try:
371 self._lru.touch(digest)
372 except KeyError:
373 return False
374 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400375
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400376 def getfileobj(self, digest):
377 with self._lock:
378 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000379 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400380 except KeyError:
381 raise CacheMiss(digest)
382 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000383 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400384 return io.BytesIO(d)
385
386 def write(self, digest, content):
387 # Assemble whole stream before taking the lock.
Lei Lei73a5f732020-03-23 20:36:14 +0000388 data = six.b('').join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400389 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000390 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400391 self._added.append(len(data))
392 return digest
393
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400394
395class DiskContentAddressedCache(ContentAddressedCache):
396 """Stateful LRU cache in a flat hash table in a directory.
397
398 Saves its state as json file.
399 """
400 STATE_FILE = u'state.json'
401
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000402 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400403 """
404 Arguments:
405 cache_dir: directory where to place the cache.
406 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400407 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000408 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400409 """
410 # All protected methods (starting with '_') except _path should be called
411 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400412 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400413 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400414 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
415 # Items in a LRU lookup dict(digest: size).
416 self._lru = lru.LRUDict()
417 # Current cached free disk space. It is updated by self._trim().
418 file_path.ensure_tree(self.cache_dir)
419 self._free_disk = file_path.get_free_space(self.cache_dir)
420 # The first item in the LRU cache that must not be evicted during this run
421 # since it was referenced. All items more recent that _protected in the LRU
422 # cache are also inherently protected. It could be a set() of all items
423 # referenced but this increases memory usage without a use case.
424 self._protected = None
425 # Cleanup operations done by self._load(), if any.
426 self._operations = []
427 with tools.Profiler('Setup'):
428 with self._lock:
429 self._load(trim, time_fn)
430
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000431 # Cache interface implementation.
432
433 def __len__(self):
434 with self._lock:
435 return len(self._lru)
436
437 def __iter__(self):
438 # This is not thread-safe.
439 return self._lru.__iter__()
440
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400441 def __contains__(self, digest):
442 with self._lock:
443 return digest in self._lru
444
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400445 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400446 def total_size(self):
447 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000448 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400449
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000450 def get_oldest(self):
451 with self._lock:
452 try:
453 # (key, (value, ts))
454 return self._lru.get_oldest()[1][1]
455 except KeyError:
456 return None
457
458 def remove_oldest(self):
459 with self._lock:
460 # TODO(maruel): Update self._added.
461 return self._remove_lru_file(True)
462
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000463 def save(self):
464 with self._lock:
465 return self._save()
466
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000467 def trim(self):
468 """Forces retention policies."""
469 with self._lock:
470 return self._trim()
471
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400472 def cleanup(self):
473 """Cleans up the cache directory.
474
475 Ensures there is no unknown files in cache_dir.
476 Ensures the read-only bits are set correctly.
477
478 At that point, the cache was already loaded, trimmed to respect cache
479 policies.
480 """
Junji Watanabe66041012021-08-11 06:40:08 +0000481 logging.info('DiskContentAddressedCache.cleanup(): Cleaning %s',
482 self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400483 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000484 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400485 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000486 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400487 # It'd be faster if there were a readdir() function.
488 for filename in fs.listdir(self.cache_dir):
489 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000490 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400491 continue
492 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000493 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400494 previous.remove(filename)
495 continue
496
497 # An untracked file. Delete it.
Junji Watanabe66041012021-08-11 06:40:08 +0000498 logging.warning(
499 'DiskContentAddressedCache.cleanup(): Removing unknown file %s',
500 filename)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400501 p = self._path(filename)
502 if fs.isdir(p):
503 try:
504 file_path.rmtree(p)
505 except OSError:
506 pass
507 else:
508 file_path.try_remove(p)
509 continue
510
511 if previous:
512 # Filter out entries that were not found.
Junji Watanabe66041012021-08-11 06:40:08 +0000513 logging.warning(
514 'DiskContentAddressedCache.cleanup(): Removed %d lost files',
515 len(previous))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400516 for filename in previous:
517 self._lru.pop(filename)
518 self._save()
519
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000520 # Verify hash of every single item to detect corruption. the corrupted
521 # files will be evicted.
Junji Watanabe66041012021-08-11 06:40:08 +0000522 total = 0
523 verified = 0
524 deleted = 0
525 logging.info(
526 'DiskContentAddressedCache.cleanup(): Verifying modified files')
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000527 with self._lock:
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000528 for digest, (_, timestamp) in list(self._lru._items.items()):
Junji Watanabe66041012021-08-11 06:40:08 +0000529 total += 1
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000530 # verify only if the mtime is grather than the timestamp in state.json
531 # to avoid take too long time.
532 if self._get_mtime(digest) <= timestamp:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000533 continue
Junji Watanabe66041012021-08-11 06:40:08 +0000534 logging.warning(
535 'DiskContentAddressedCache.cleanup(): Item has been modified.'
536 ' verifying item: %s', digest)
537 is_valid = self._is_valid_hash(digest)
538 verified += 1
539 logging.warning(
540 'DiskContentAddressedCache.cleanup(): verified. is_valid: %s, '
541 'item: %s', is_valid, digest)
542 if is_valid:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000543 # Update timestamp in state.json
544 self._lru.touch(digest)
545 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000546 # remove corrupted file from LRU and file system
547 self._lru.pop(digest)
548 self._delete_file(digest, UNKNOWN_FILE_SIZE)
Junji Watanabe66041012021-08-11 06:40:08 +0000549 deleted += 1
550 logging.error(
551 'DiskContentAddressedCache.cleanup(): Deleted corrupted item: %s',
552 digest)
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000553 self._save()
Junji Watanabe66041012021-08-11 06:40:08 +0000554 logging.info(
555 'DiskContentAddressedCache.cleanup(): Verified modified files.'
556 ' total: %d, verified: %d, deleted: %d', total, verified, deleted)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400557
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000558 # ContentAddressedCache interface implementation.
559
560 def __contains__(self, digest):
561 with self._lock:
562 return digest in self._lru
563
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400564 def touch(self, digest, size):
565 """Verifies an actual file is valid and bumps its LRU position.
566
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000567 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400568
569 Note that is doesn't compute the hash so it could still be corrupted if the
570 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400571 """
572 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000573 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400574
575 # Update its LRU position.
576 with self._lock:
577 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000578 if looks_valid:
579 # Exists but not in the LRU anymore.
580 self._delete_file(digest, size)
581 return False
582 if not looks_valid:
583 self._lru.pop(digest)
584 # Exists but not in the LRU anymore.
585 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400586 return False
587 self._lru.touch(digest)
588 self._protected = self._protected or digest
589 return True
590
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400591 def getfileobj(self, digest):
592 try:
593 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400594 except IOError:
595 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000596 with self._lock:
597 try:
598 self._used.append(self._lru[digest])
599 except KeyError:
600 # If the digest is not actually in _lru, assume it is a cache miss.
601 # Existing file will be overwritten by whoever uses the cache and added
602 # to _lru.
603 f.close()
604 raise CacheMiss(digest)
605 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400606
607 def write(self, digest, content):
608 assert content is not None
609 with self._lock:
610 self._protected = self._protected or digest
611 path = self._path(digest)
612 # A stale broken file may remain. It is possible for the file to have write
613 # access bit removed which would cause the file_write() call to fail to open
614 # in write mode. Take no chance here.
615 file_path.try_remove(path)
616 try:
617 size = file_write(path, content)
618 except:
619 # There are two possible places were an exception can occur:
620 # 1) Inside |content| generator in case of network or unzipping errors.
621 # 2) Inside file_write itself in case of disk IO errors.
622 # In any case delete an incomplete file and propagate the exception to
623 # caller, it will be logged there.
624 file_path.try_remove(path)
625 raise
626 # Make the file read-only in the cache. This has a few side-effects since
627 # the file node is modified, so every directory entries to this file becomes
628 # read-only. It's fine here because it is a new file.
629 file_path.set_read_only(path, True)
630 with self._lock:
631 self._add(digest, size)
632 return digest
633
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000634 # Internal functions.
635
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400636 def _load(self, trim, time_fn):
637 """Loads state of the cache from json file.
638
639 If cache_dir does not exist on disk, it is created.
640 """
641 self._lock.assert_locked()
642
643 if not fs.isfile(self.state_file):
644 if not fs.isdir(self.cache_dir):
645 fs.makedirs(self.cache_dir)
646 else:
647 # Load state of the cache.
648 try:
649 self._lru = lru.LRUDict.load(self.state_file)
650 except ValueError as err:
651 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000652 # Don't want to keep broken cache dir.
653 file_path.rmtree(self.cache_dir)
654 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000655 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400656 if time_fn:
657 self._lru.time_fn = time_fn
658 if trim:
659 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400660
661 def _save(self):
662 """Saves the LRU ordering."""
663 self._lock.assert_locked()
664 if sys.platform != 'win32':
665 d = os.path.dirname(self.state_file)
666 if fs.isdir(d):
667 # Necessary otherwise the file can't be created.
668 file_path.set_read_only(d, False)
669 if fs.isfile(self.state_file):
670 file_path.set_read_only(self.state_file, False)
671 self._lru.save(self.state_file)
672
673 def _trim(self):
674 """Trims anything we don't know, make sure enough free space exists."""
675 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000676 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400677
678 # Trim old items.
679 if self.policies.max_age_secs:
680 cutoff = self._lru.time_fn() - self.policies.max_age_secs
681 while self._lru:
682 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000683 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400684 if oldest[1][1] >= cutoff:
685 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000686 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400687
688 # Ensure maximum cache size.
689 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000690 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400691 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000692 e = self._remove_lru_file(True)
693 evicted.append(e)
694 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400695
696 # Ensure maximum number of items in the cache.
697 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000698 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000699 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400700
701 # Ensure enough free space.
702 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400703 while (
704 self.policies.min_free_space and
705 self._lru and
706 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000707 # self._free_disk is updated by this call.
708 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400709
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000710 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000711 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400712 usage_percent = 0.
713 if total_usage:
714 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
715
716 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000717 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
718 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
Junji Watanabe38b28b02020-04-23 10:23:30 +0000719 '%.1fkb)', len(evicted),
720 sum(evicted) / 1024., self._free_disk / 1024., total_usage / 1024.,
721 usage_percent, self.policies.max_cache_size / 1024.)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400722 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000723 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400724
725 def _path(self, digest):
726 """Returns the path to one item."""
727 return os.path.join(self.cache_dir, digest)
728
729 def _remove_lru_file(self, allow_protected):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000730 """Removes the latest recently used file and returns its size.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000731
732 Updates self._free_disk.
733 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400734 self._lock.assert_locked()
735 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000736 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400737 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000738 total_size = sum(self._lru.values())
739 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000740 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
741 '%s bytes (%.3f GiB) free_space') % (
742 self.policies, total_size, float(total_size) / 1024**3,
743 len(self._lru), self._free_disk,
744 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400745 raise NoMoreSpace(msg)
746 except KeyError:
747 # That means an internal error.
748 raise NoMoreSpace('Nothing to remove, can\'t happend')
749 digest, (size, _) = self._lru.pop_oldest()
Takuto Ikuta8d8ca9b2021-02-26 02:31:43 +0000750 logging.debug('Removing LRU file %s with size %s bytes', digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400751 self._delete_file(digest, size)
752 return size
753
754 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
755 """Adds an item into LRU cache marking it as a newest one."""
756 self._lock.assert_locked()
757 if size == UNKNOWN_FILE_SIZE:
758 size = fs.stat(self._path(digest)).st_size
759 self._added.append(size)
760 self._lru.add(digest, size)
761 self._free_disk -= size
762 # Do a quicker version of self._trim(). It only enforces free disk space,
763 # not cache size limits. It doesn't actually look at real free disk space,
764 # only uses its cache values. self._trim() will be called later to enforce
765 # real trimming but doing this quick version here makes it possible to map
766 # an isolated that is larger than the current amount of free disk space when
767 # the cache size is already large.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000768 while (self.policies.min_free_space and self._lru and
769 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000770 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400771 if self._remove_lru_file(False) == -1:
772 break
773
774 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000775 """Deletes cache file from the file system.
776
777 Updates self._free_disk.
778 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400779 self._lock.assert_locked()
780 try:
781 if size == UNKNOWN_FILE_SIZE:
782 try:
783 size = fs.stat(self._path(digest)).st_size
784 except OSError:
785 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000786 if file_path.try_remove(self._path(digest)):
787 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400788 except OSError as e:
789 if e.errno != errno.ENOENT:
790 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400791
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000792 def _get_mtime(self, digest):
793 """Get mtime of cache file."""
794 return os.path.getmtime(self._path(digest))
795
796 def _is_valid_hash(self, digest):
797 """Verify digest with supported hash algos."""
798 for _, algo in isolated_format.SUPPORTED_ALGOS.items():
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000799 if digest == isolated_format.hash_file(self._path(digest), algo):
800 return True
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000801 return False
802
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400803
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400804class NamedCache(Cache):
805 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400806
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400807 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400808 name is a short identifier that describes the contents of the cache, e.g.
809 "git_v8" could be all git repositories required by v8 builds, or
810 "build_chromium" could be build artefacts of the Chromium.
811 path is a directory path relative to the task run dir. Cache installation
812 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400813 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400814 _DIR_ALPHABET = string.ascii_letters + string.digits
815 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000816 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400817
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400818 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400819 """Initializes NamedCaches.
820
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400821 Arguments:
822 - cache_dir is a directory for persistent cache storage.
823 - policies is a CachePolicies instance.
824 - time_fn is a function that returns timestamp (float) and used to take
825 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400826 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400827 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400828 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000829 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400830 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
831 self._lru = lru.LRUDict()
832 if not fs.isdir(self.cache_dir):
833 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000834 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000835 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400836 self._lru = lru.LRUDict.load(self.state_file)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000837 for _, size in self._lru.values():
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000838 if not isinstance(size, six.integer_types):
Takuto Ikuta6acf8f92020-07-02 02:06:42 +0000839 with open(self.state_file, 'r') as f:
840 logging.info('named cache state file: %s\n%s', self.state_file,
841 f.read())
Junji Watanabeedcf47d2020-06-11 08:41:01 +0000842 raise ValueError("size is not integer: %s" % size)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000843
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400844 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000845 logging.exception(
846 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400847 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000848 fs.makedirs(self.cache_dir)
Takuto Ikutadadfbb02020-07-10 03:31:26 +0000849 self._lru = lru.LRUDict()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000850 with self._lock:
851 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400852 if time_fn:
853 self._lru.time_fn = time_fn
854
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400855 @property
856 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000857 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400858 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000859 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400860
Takuto Ikutaeab23172020-07-02 03:50:02 +0000861 def _sudo_chown(self, path):
862 if sys.platform == 'win32':
863 return
864 uid = os.getuid()
865 if os.stat(path).st_uid == uid:
866 return
867 # Maybe owner of |path| is different from runner of this script. This is to
868 # make fs.rename work in that case.
869 # https://crbug.com/986676
870 subprocess.check_call(['sudo', '-n', 'chown', str(uid), path])
871
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000872 def install(self, dst, name):
873 """Creates the directory |dst| and moves a previous named cache |name| if it
874 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400875
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000876 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400877
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000878 Returns the reused named cache size in bytes, or 0 if none was present.
879
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400880 Raises NamedCacheError if cannot install the cache.
881 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000882 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400883 with self._lock:
884 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000885 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400886 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000887 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400888
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000889 # Remove the named symlink if it exists.
890 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000891 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000892 # Remove the symlink itself, not its destination.
893 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000894
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000895 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000896 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400897 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000898 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000899 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000900 file_path.ensure_tree(os.path.dirname(dst))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000901 self._sudo_chown(abs_cache)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000902 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400903 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000904 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400905
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000906 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400907 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400908
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000909 # The named cache does not exist, create an empty directory. When
910 # uninstalling, we will move it back to the cache and create an an
911 # entry.
912 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000913 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000914 return 0
Junji Watanabed2ab86b2021-08-13 07:20:23 +0000915 except (IOError, OSError, PermissionError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000916 # Raise using the original traceback.
917 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000918 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000919 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000920 finally:
921 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400922
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000923 def uninstall(self, src, name):
924 """Moves the cache directory back into the named cache hive for an eventual
925 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400926
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000927 The opposite of install().
928
929 src must be absolute and unicode. Its content is moved back into the local
930 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400931
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000932 Returns the named cache size in bytes.
933
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400934 Raises NamedCacheError if cannot uninstall the cache.
935 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000936 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Junji Watanabe9cdfff52021-01-08 07:20:35 +0000937 start = time.time()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400938 with self._lock:
939 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000940 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400941 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000942 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000943 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400944 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400945
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000946 if name in self._lru:
947 # This shouldn't happen but just remove the preexisting one and move
948 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000949 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000950 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000951
Takuto Ikuta93483272020-06-05 09:06:34 +0000952 # Calculate the size of the named cache to keep.
Takuto Ikuta995da062021-03-17 05:01:59 +0000953 size = file_path.get_recursive_size(src)
Takuto Ikuta262f8292020-08-26 01:54:22 +0000954 logging.info('- Size is %s', size)
955 if size is None:
956 # Do not save a named cache that was deleted.
957 return
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400958
959 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000960 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400961 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000962 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400963 file_path.ensure_tree(os.path.dirname(abs_cache))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000964 self._sudo_chown(src)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000965 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400966
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000967 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000968 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000969
970 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
971 # for user convenience.
972 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000973 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000974 file_path.remove(named_path)
975 else:
976 file_path.ensure_tree(os.path.dirname(named_path))
977
978 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000979 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000980 logging.info(
981 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000982 except OSError:
983 # Ignore on Windows. It happens when running as a normal user or when
984 # UAC is enabled and the user is a filtered administrator account.
985 if sys.platform != 'win32':
986 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000987 return size
Junji Watanabed2ab86b2021-08-13 07:20:23 +0000988 except (IOError, OSError, PermissionError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000989 # Raise using the original traceback.
990 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000991 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000992 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000993 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000994 # Call save() at every uninstall. The assumptions are:
995 # - The total the number of named caches is low, so the state.json file
996 # is small, so the time it takes to write it to disk is short.
997 # - The number of mapped named caches per task is low, so the number of
998 # times save() is called on tear-down isn't high enough to be
999 # significant.
1000 # - uninstall() sometimes throws due to file locking on Windows or
1001 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001002 self._save()
Junji Watanabe9cdfff52021-01-08 07:20:35 +00001003 logging.info('NamedCache.uninstall(%r, %r) took %d seconds', src, name,
1004 time.time() - start)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001005
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001006 # Cache interface implementation.
1007
1008 def __len__(self):
1009 with self._lock:
1010 return len(self._lru)
1011
1012 def __iter__(self):
1013 # This is not thread-safe.
1014 return self._lru.__iter__()
1015
John Budorickc6186972020-02-26 00:58:14 +00001016 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001017 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +00001018 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001019
1020 @property
1021 def total_size(self):
1022 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001023 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001024
1025 def get_oldest(self):
1026 with self._lock:
1027 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001028 # (key, (value, ts))
1029 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001030 except KeyError:
1031 return None
1032
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001033 def remove_oldest(self):
1034 with self._lock:
1035 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001036 _name, size = self._remove_lru_item()
1037 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001038
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001039 def save(self):
1040 with self._lock:
1041 return self._save()
1042
John Budorickc6186972020-02-26 00:58:14 +00001043 def touch(self, *names):
1044 with self._lock:
1045 for name in names:
1046 if name in self._lru:
1047 self._lru.touch(name)
1048 self._save()
1049
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001050 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001051 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001052 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001053 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001054 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001055
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001056 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001057 if self._policies.max_items:
1058 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001059 name, size = self._remove_lru_item()
1060 evicted.append(size)
1061 logging.info(
1062 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1063 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001064
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001065 # Trim according to maximum age.
1066 if self._policies.max_age_secs:
1067 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1068 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001069 _name, (_data, ts) = self._lru.get_oldest()
1070 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001071 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001072 name, size = self._remove_lru_item()
1073 evicted.append(size)
1074 logging.info(
1075 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1076 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001077
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001078 # Trim according to minimum free space.
1079 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001080 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001081 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001082 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001083 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001084 name, size = self._remove_lru_item()
1085 evicted.append(size)
1086 logging.info(
1087 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1088 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001089
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001090 # Trim according to maximum total size.
1091 if self._policies.max_cache_size:
1092 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001093 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001094 if total <= self._policies.max_cache_size:
1095 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001096 name, size = self._remove_lru_item()
1097 evicted.append(size)
1098 logging.info(
1099 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1100 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001101
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001102 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001103 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001104
1105 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001106 """Removes unknown directories.
1107
1108 Does not recalculate the cache size since it's surprisingly slow on some
1109 OSes.
1110 """
Junji Watanabe66041012021-08-11 06:40:08 +00001111 logging.info('NamedCache.cleanup(): Cleaning %s', self.cache_dir)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001112 success = True
1113 with self._lock:
1114 try:
1115 actual = set(fs.listdir(self.cache_dir))
1116 actual.discard(self.NAMED_DIR)
1117 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001118 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001119 # First, handle the actual cache content.
1120 # Remove missing entries.
1121 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001122 name, size = self._lru.pop(expected[missing])
1123 logging.warning(
1124 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001125 # Remove unexpected items.
1126 for unexpected in (actual - set(expected)):
1127 try:
1128 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001129 logging.warning(
1130 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001131 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001132 file_path.rmtree(p)
1133 else:
1134 fs.remove(p)
1135 except (IOError, OSError) as e:
1136 logging.error('Failed to remove %s: %s', unexpected, e)
1137 success = False
1138
1139 # Second, fix named cache links.
1140 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001141 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001142 actual = set(fs.listdir(named))
1143 expected = set(self._lru)
1144 # Confirm entries. Do not add missing ones for now.
1145 for name in expected.intersection(actual):
1146 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001147 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001148 if fs.islink(p):
1149 link = fs.readlink(p)
1150 if expected_link == link:
1151 continue
1152 logging.warning(
1153 'Unexpected symlink for cache %s: %s, expected %s',
1154 name, link, expected_link)
1155 else:
1156 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001157 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001158 file_path.rmtree(p)
1159 else:
1160 fs.remove(p)
1161 # Remove unexpected items.
1162 for unexpected in (actual - expected):
1163 try:
1164 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1165 if fs.isdir(p):
1166 file_path.rmtree(p)
1167 else:
1168 fs.remove(p)
1169 except (IOError, OSError) as e:
1170 logging.error('Failed to remove %s: %s', unexpected, e)
1171 success = False
1172 finally:
1173 self._save()
1174 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001175
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001176 # Internal functions.
1177
1178 def _try_upgrade(self):
1179 """Upgrades from the old format to the new one if necessary.
1180
1181 This code can be removed so all bots are known to have the right new format.
1182 """
1183 if not self._lru:
1184 return
1185 _name, (data, _ts) = self._lru.get_oldest()
1186 if isinstance(data, (list, tuple)):
1187 return
1188 # Update to v2.
1189 def upgrade(_name, rel_cache):
1190 abs_cache = os.path.join(self.cache_dir, rel_cache)
Takuto Ikuta995da062021-03-17 05:01:59 +00001191 return rel_cache, file_path.get_recursive_size(abs_cache)
1192
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001193 self._lru.transform(upgrade)
1194 self._save()
1195
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001196 def _remove_lru_item(self):
1197 """Removes the oldest LRU entry. LRU must not be empty."""
1198 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
Takuto Ikuta74686842021-07-30 04:11:03 +00001199 logging.info('Removing named cache %r, %d', name, size)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001200 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001201 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001202
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001203 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001204 """Creates and returns relative path of a new cache directory.
1205
1206 In practice, it is a 2-letter string.
1207 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001208 # We randomly generate directory names that have two lower/upper case
1209 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1210 abc_len = len(self._DIR_ALPHABET)
1211 tried = set()
1212 while len(tried) < 1000:
1213 i = random.randint(0, abc_len * abc_len - 1)
1214 rel_path = (
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001215 self._DIR_ALPHABET[i // abc_len] + self._DIR_ALPHABET[i % abc_len])
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001216 if rel_path in tried:
1217 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001218 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001219 if not fs.exists(abs_path):
1220 return rel_path
1221 tried.add(rel_path)
1222 raise NamedCacheError(
1223 'could not allocate a new cache dir, too many cache dirs')
1224
1225 def _remove(self, name):
1226 """Removes a cache directory and entry.
1227
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001228 Returns:
1229 Number of caches deleted.
1230 """
1231 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001232 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001233 named_dir = self._get_named_path(name)
1234 if fs.islink(named_dir):
1235 fs.unlink(named_dir)
1236
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001237 # Then remove the actual data.
1238 if name not in self._lru:
1239 return
1240 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001241 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001242 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001243 file_path.rmtree(abs_path)
1244 self._lru.pop(name)
1245
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001246 def _save(self):
1247 self._lock.assert_locked()
1248 file_path.ensure_tree(self.cache_dir)
1249 self._lru.save(self.state_file)
1250
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001251 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001252 return os.path.join(self.cache_dir, self.NAMED_DIR, name)