blob: 6205869f462b733fcf15e5f19877a431984e7b88 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Takuto Ikuta2fe58fd2021-08-18 13:47:36 +00007from __future__ import print_function
8
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04009import errno
10import io
11import logging
12import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040013import random
14import string
Junji Watanabe7b720782020-07-01 01:51:07 +000015import subprocess
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040016import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000017import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040018
19from utils import file_path
20from utils import fs
21from utils import lru
22from utils import threading_utils
23from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000024tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040025
Lei Leife202df2019-06-11 17:33:34 +000026# third_party/
27import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040028
Junji Watanabe5e73aab2020-04-09 04:20:27 +000029import isolated_format
30
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040031# The file size to be used when we don't know the correct file size,
32# generally used for .isolated files.
33UNKNOWN_FILE_SIZE = None
34
Junji Watanabed2ab86b2021-08-13 07:20:23 +000035# PermissionError isn't defined in Python2.
36if six.PY2:
37 PermissionError = None
38
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040039
40def file_write(path, content_generator):
41 """Writes file content as generated by content_generator.
42
43 Creates the intermediary directory as needed.
44
45 Returns the number of bytes written.
46
47 Meant to be mocked out in unit tests.
48 """
49 file_path.ensure_tree(os.path.dirname(path))
50 total = 0
51 with fs.open(path, 'wb') as f:
52 for d in content_generator:
53 total += len(d)
54 f.write(d)
55 return total
56
57
58def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000059 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040060
61 Currently it just checks the file exists and its size matches the expectation.
62 """
63 if size == UNKNOWN_FILE_SIZE:
64 return fs.isfile(path)
65 try:
66 actual_size = fs.stat(path).st_size
67 except OSError as e:
Junji Watanabe38b28b02020-04-23 10:23:30 +000068 logging.warning('Can\'t read item %s, assuming it\'s invalid: %s',
69 os.path.basename(path), e)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040070 return False
71 if size != actual_size:
72 logging.warning(
73 'Found invalid item %s; %d != %d',
74 os.path.basename(path), actual_size, size)
75 return False
76 return True
77
78
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000079def trim_caches(caches, path, min_free_space, max_age_secs):
80 """Trims multiple caches.
81
82 The goal here is to coherently trim all caches in a coherent LRU fashion,
83 deleting older items independent of which container they belong to.
84
85 Two policies are enforced first:
86 - max_age_secs
87 - min_free_space
88
89 Once that's done, then we enforce each cache's own policies.
90
91 Returns:
92 Slice containing the size of all items evicted.
93 """
94 min_ts = time.time() - max_age_secs if max_age_secs else 0
95 free_disk = file_path.get_free_space(path) if min_free_space else 0
Junji Watanabe66041012021-08-11 06:40:08 +000096 logging.info("Trimming caches. min_ts: %d, free_disk: %d, min_free_space: %d",
97 min_ts, free_disk, min_free_space)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000098 total = []
99 if min_ts or free_disk:
100 while True:
101 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
102 if not oldest:
103 break
Lei Leife202df2019-06-11 17:33:34 +0000104 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000105 c, ts = oldest[0]
106 if ts >= min_ts and free_disk >= min_free_space:
107 break
108 total.append(c.remove_oldest())
109 if min_free_space:
110 free_disk = file_path.get_free_space(path)
Takuto Ikuta74686842021-07-30 04:11:03 +0000111 logging.info("free_disk after removing oldest entries: %d", free_disk)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000112 # Evaluate each cache's own policies.
113 for c in caches:
114 total.extend(c.trim())
115 return total
116
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000117
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400118class NamedCacheError(Exception):
119 """Named cache specific error."""
120
121
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400122class NoMoreSpace(Exception):
123 """Not enough space to map the whole directory."""
124 pass
125
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400126
127class CachePolicies(object):
128 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
129 """Common caching policies for the multiple caches (isolated, named, cipd).
130
131 Arguments:
132 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
133 cache is effectively a leak.
134 - min_free_space: Trim if disk free space becomes lower than this value. If
135 0, it will unconditionally fill the disk.
136 - max_items: Maximum number of items to keep in the cache. If 0, do not
137 enforce a limit.
138 - max_age_secs: Maximum age an item is kept in the cache until it is
139 automatically evicted. Having a lot of dead luggage slows
140 everything down.
141 """
142 self.max_cache_size = max_cache_size
143 self.min_free_space = min_free_space
144 self.max_items = max_items
145 self.max_age_secs = max_age_secs
146
147 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000148 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
149 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
150 self.max_cache_size, float(self.max_cache_size) / 1024**3,
151 self.max_items, self.min_free_space,
152 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400153
154
155class CacheMiss(Exception):
156 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400157 def __init__(self, digest):
158 self.digest = digest
Junji Watanabe38b28b02020-04-23 10:23:30 +0000159 super(CacheMiss,
160 self).__init__('Item with digest %r is not found in cache' % digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400161
162
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400163class Cache(object):
Junji Watanabe38b28b02020-04-23 10:23:30 +0000164
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400165 def __init__(self, cache_dir):
166 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000167 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400168 assert file_path.isabs(cache_dir), cache_dir
169 self.cache_dir = cache_dir
170 self._lock = threading_utils.LockWithAssert()
171 # Profiling values.
172 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400173 self._used = []
174
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000175 def __nonzero__(self):
176 """A cache is always True.
177
178 Otherwise it falls back to __len__, which is surprising.
179 """
180 return True
181
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000182 def __bool__(self):
183 """A cache is always True.
184
185 Otherwise it falls back to __len__, which is surprising.
186 """
187 return True
188
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000189 def __len__(self):
190 """Returns the number of entries in the cache."""
191 raise NotImplementedError()
192
193 def __iter__(self):
194 """Iterates over all the entries names."""
195 raise NotImplementedError()
196
197 def __contains__(self, name):
198 """Returns if an entry is in the cache."""
199 raise NotImplementedError()
200
201 @property
202 def total_size(self):
203 """Returns the total size of the cache in bytes."""
204 raise NotImplementedError()
205
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400206 @property
207 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000208 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400209 with self._lock:
210 return self._added[:]
211
212 @property
213 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000214 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400215 with self._lock:
216 return self._used[:]
217
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000218 def get_oldest(self):
219 """Returns timestamp of oldest cache entry or None.
220
221 Returns:
222 Timestamp of the oldest item.
223
224 Used for manual trimming.
225 """
226 raise NotImplementedError()
227
228 def remove_oldest(self):
229 """Removes the oldest item from the cache.
230
231 Returns:
232 Size of the oldest item.
233
234 Used for manual trimming.
235 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400236 raise NotImplementedError()
237
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000238 def save(self):
239 """Saves the current cache to disk."""
240 raise NotImplementedError()
241
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400242 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000243 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400244
245 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000246 Slice with the size of evicted items.
247 """
248 raise NotImplementedError()
249
250 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000251 """Deletes any corrupted item from the cache, then calls trim(), then
252 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000253
254 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400255 """
256 raise NotImplementedError()
257
258
259class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400260 """Content addressed cache that stores objects temporarily.
261
262 It can be accessed concurrently from multiple threads, so it should protect
263 its internal state with some lock.
264 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400265
266 def __enter__(self):
267 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000268 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400269 return self
270
271 def __exit__(self, _exc_type, _exec_value, _traceback):
272 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000273 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400274 return False
275
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400276 def touch(self, digest, size):
277 """Ensures item is not corrupted and updates its LRU position.
278
279 Arguments:
280 digest: hash digest of item to check.
281 size: expected size of this item.
282
283 Returns:
284 True if item is in cache and not corrupted.
285 """
286 raise NotImplementedError()
287
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400288 def getfileobj(self, digest):
289 """Returns a readable file like object.
290
291 If file exists on the file system it will have a .name attribute with an
292 absolute path to the file.
293 """
294 raise NotImplementedError()
295
296 def write(self, digest, content):
297 """Reads data from |content| generator and stores it in cache.
298
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000299 It is possible to write to an object that already exists. It may be
300 ignored (sent to /dev/null) but the timestamp is still updated.
301
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400302 Returns digest to simplify chaining.
303 """
304 raise NotImplementedError()
305
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400306
307class MemoryContentAddressedCache(ContentAddressedCache):
308 """ContentAddressedCache implementation that stores everything in memory."""
309
Lei Leife202df2019-06-11 17:33:34 +0000310 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400311 """Args:
312 file_mode_mask: bit mask to AND file mode with. Default value will make
313 all mapped files to be read only.
314 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400315 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400316 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000317 # Items in a LRU lookup dict(digest: size).
318 self._lru = lru.LRUDict()
319
320 # Cache interface implementation.
321
322 def __len__(self):
323 with self._lock:
324 return len(self._lru)
325
326 def __iter__(self):
327 # This is not thread-safe.
328 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400329
330 def __contains__(self, digest):
331 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000332 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400333
334 @property
335 def total_size(self):
336 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000337 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400338
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000339 def get_oldest(self):
340 with self._lock:
341 try:
342 # (key, (value, ts))
343 return self._lru.get_oldest()[1][1]
344 except KeyError:
345 return None
346
347 def remove_oldest(self):
348 with self._lock:
349 # TODO(maruel): Update self._added.
350 # (key, (value, ts))
351 return len(self._lru.pop_oldest()[1][0])
352
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000353 def save(self):
354 pass
355
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000356 def trim(self):
357 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000358 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400359
360 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000361 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400362 pass
363
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000364 # ContentAddressedCache interface implementation.
365
366 def __contains__(self, digest):
367 with self._lock:
368 return digest in self._lru
369
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400370 def touch(self, digest, size):
371 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000372 try:
373 self._lru.touch(digest)
374 except KeyError:
375 return False
376 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400377
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400378 def getfileobj(self, digest):
379 with self._lock:
380 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000381 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400382 except KeyError:
383 raise CacheMiss(digest)
384 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000385 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400386 return io.BytesIO(d)
387
388 def write(self, digest, content):
389 # Assemble whole stream before taking the lock.
Lei Lei73a5f732020-03-23 20:36:14 +0000390 data = six.b('').join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400391 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000392 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400393 self._added.append(len(data))
394 return digest
395
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400396
397class DiskContentAddressedCache(ContentAddressedCache):
398 """Stateful LRU cache in a flat hash table in a directory.
399
400 Saves its state as json file.
401 """
402 STATE_FILE = u'state.json'
403
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000404 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400405 """
406 Arguments:
407 cache_dir: directory where to place the cache.
408 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400409 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000410 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400411 """
412 # All protected methods (starting with '_') except _path should be called
413 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400414 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400415 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400416 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
417 # Items in a LRU lookup dict(digest: size).
418 self._lru = lru.LRUDict()
419 # Current cached free disk space. It is updated by self._trim().
420 file_path.ensure_tree(self.cache_dir)
421 self._free_disk = file_path.get_free_space(self.cache_dir)
422 # The first item in the LRU cache that must not be evicted during this run
423 # since it was referenced. All items more recent that _protected in the LRU
424 # cache are also inherently protected. It could be a set() of all items
425 # referenced but this increases memory usage without a use case.
426 self._protected = None
427 # Cleanup operations done by self._load(), if any.
428 self._operations = []
429 with tools.Profiler('Setup'):
430 with self._lock:
431 self._load(trim, time_fn)
432
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000433 # Cache interface implementation.
434
435 def __len__(self):
436 with self._lock:
437 return len(self._lru)
438
439 def __iter__(self):
440 # This is not thread-safe.
441 return self._lru.__iter__()
442
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400443 def __contains__(self, digest):
444 with self._lock:
445 return digest in self._lru
446
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400447 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400448 def total_size(self):
449 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000450 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400451
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000452 def get_oldest(self):
453 with self._lock:
454 try:
455 # (key, (value, ts))
456 return self._lru.get_oldest()[1][1]
457 except KeyError:
458 return None
459
460 def remove_oldest(self):
461 with self._lock:
462 # TODO(maruel): Update self._added.
463 return self._remove_lru_file(True)
464
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000465 def save(self):
466 with self._lock:
467 return self._save()
468
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000469 def trim(self):
470 """Forces retention policies."""
471 with self._lock:
472 return self._trim()
473
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400474 def cleanup(self):
475 """Cleans up the cache directory.
476
477 Ensures there is no unknown files in cache_dir.
478 Ensures the read-only bits are set correctly.
479
480 At that point, the cache was already loaded, trimmed to respect cache
481 policies.
482 """
Junji Watanabe66041012021-08-11 06:40:08 +0000483 logging.info('DiskContentAddressedCache.cleanup(): Cleaning %s',
484 self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400485 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000486 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400487 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000488 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400489 # It'd be faster if there were a readdir() function.
490 for filename in fs.listdir(self.cache_dir):
491 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000492 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400493 continue
494 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000495 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400496 previous.remove(filename)
497 continue
498
499 # An untracked file. Delete it.
Junji Watanabe66041012021-08-11 06:40:08 +0000500 logging.warning(
501 'DiskContentAddressedCache.cleanup(): Removing unknown file %s',
502 filename)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400503 p = self._path(filename)
504 if fs.isdir(p):
505 try:
506 file_path.rmtree(p)
507 except OSError:
508 pass
509 else:
510 file_path.try_remove(p)
511 continue
512
513 if previous:
514 # Filter out entries that were not found.
Junji Watanabe66041012021-08-11 06:40:08 +0000515 logging.warning(
516 'DiskContentAddressedCache.cleanup(): Removed %d lost files',
517 len(previous))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400518 for filename in previous:
519 self._lru.pop(filename)
520 self._save()
521
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000522 # Verify hash of every single item to detect corruption. the corrupted
523 # files will be evicted.
Junji Watanabe66041012021-08-11 06:40:08 +0000524 total = 0
525 verified = 0
526 deleted = 0
527 logging.info(
528 'DiskContentAddressedCache.cleanup(): Verifying modified files')
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000529 with self._lock:
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000530 for digest, (_, timestamp) in list(self._lru._items.items()):
Junji Watanabe66041012021-08-11 06:40:08 +0000531 total += 1
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000532 # verify only if the mtime is grather than the timestamp in state.json
533 # to avoid take too long time.
534 if self._get_mtime(digest) <= timestamp:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000535 continue
Junji Watanabe66041012021-08-11 06:40:08 +0000536 logging.warning(
537 'DiskContentAddressedCache.cleanup(): Item has been modified.'
538 ' verifying item: %s', digest)
539 is_valid = self._is_valid_hash(digest)
540 verified += 1
541 logging.warning(
542 'DiskContentAddressedCache.cleanup(): verified. is_valid: %s, '
543 'item: %s', is_valid, digest)
544 if is_valid:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000545 # Update timestamp in state.json
546 self._lru.touch(digest)
547 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000548 # remove corrupted file from LRU and file system
549 self._lru.pop(digest)
550 self._delete_file(digest, UNKNOWN_FILE_SIZE)
Junji Watanabe66041012021-08-11 06:40:08 +0000551 deleted += 1
552 logging.error(
553 'DiskContentAddressedCache.cleanup(): Deleted corrupted item: %s',
554 digest)
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000555 self._save()
Junji Watanabe66041012021-08-11 06:40:08 +0000556 logging.info(
557 'DiskContentAddressedCache.cleanup(): Verified modified files.'
558 ' total: %d, verified: %d, deleted: %d', total, verified, deleted)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400559
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000560 # ContentAddressedCache interface implementation.
561
562 def __contains__(self, digest):
563 with self._lock:
564 return digest in self._lru
565
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400566 def touch(self, digest, size):
567 """Verifies an actual file is valid and bumps its LRU position.
568
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000569 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400570
571 Note that is doesn't compute the hash so it could still be corrupted if the
572 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400573 """
574 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000575 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400576
577 # Update its LRU position.
578 with self._lock:
579 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000580 if looks_valid:
581 # Exists but not in the LRU anymore.
582 self._delete_file(digest, size)
583 return False
584 if not looks_valid:
585 self._lru.pop(digest)
586 # Exists but not in the LRU anymore.
587 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400588 return False
589 self._lru.touch(digest)
590 self._protected = self._protected or digest
591 return True
592
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400593 def getfileobj(self, digest):
594 try:
595 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400596 except IOError:
597 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000598 with self._lock:
599 try:
600 self._used.append(self._lru[digest])
601 except KeyError:
602 # If the digest is not actually in _lru, assume it is a cache miss.
603 # Existing file will be overwritten by whoever uses the cache and added
604 # to _lru.
605 f.close()
606 raise CacheMiss(digest)
607 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400608
609 def write(self, digest, content):
610 assert content is not None
611 with self._lock:
612 self._protected = self._protected or digest
613 path = self._path(digest)
614 # A stale broken file may remain. It is possible for the file to have write
615 # access bit removed which would cause the file_write() call to fail to open
616 # in write mode. Take no chance here.
617 file_path.try_remove(path)
618 try:
619 size = file_write(path, content)
620 except:
621 # There are two possible places were an exception can occur:
622 # 1) Inside |content| generator in case of network or unzipping errors.
623 # 2) Inside file_write itself in case of disk IO errors.
624 # In any case delete an incomplete file and propagate the exception to
625 # caller, it will be logged there.
626 file_path.try_remove(path)
627 raise
628 # Make the file read-only in the cache. This has a few side-effects since
629 # the file node is modified, so every directory entries to this file becomes
630 # read-only. It's fine here because it is a new file.
631 file_path.set_read_only(path, True)
632 with self._lock:
633 self._add(digest, size)
634 return digest
635
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000636 # Internal functions.
637
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400638 def _load(self, trim, time_fn):
639 """Loads state of the cache from json file.
640
641 If cache_dir does not exist on disk, it is created.
642 """
643 self._lock.assert_locked()
644
645 if not fs.isfile(self.state_file):
646 if not fs.isdir(self.cache_dir):
647 fs.makedirs(self.cache_dir)
648 else:
649 # Load state of the cache.
650 try:
651 self._lru = lru.LRUDict.load(self.state_file)
652 except ValueError as err:
653 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000654 # Don't want to keep broken cache dir.
655 file_path.rmtree(self.cache_dir)
656 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000657 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400658 if time_fn:
659 self._lru.time_fn = time_fn
660 if trim:
661 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400662
663 def _save(self):
664 """Saves the LRU ordering."""
665 self._lock.assert_locked()
666 if sys.platform != 'win32':
667 d = os.path.dirname(self.state_file)
668 if fs.isdir(d):
669 # Necessary otherwise the file can't be created.
670 file_path.set_read_only(d, False)
671 if fs.isfile(self.state_file):
672 file_path.set_read_only(self.state_file, False)
673 self._lru.save(self.state_file)
674
675 def _trim(self):
676 """Trims anything we don't know, make sure enough free space exists."""
677 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000678 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400679
680 # Trim old items.
681 if self.policies.max_age_secs:
682 cutoff = self._lru.time_fn() - self.policies.max_age_secs
683 while self._lru:
684 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000685 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400686 if oldest[1][1] >= cutoff:
687 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000688 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400689
690 # Ensure maximum cache size.
691 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000692 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400693 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000694 e = self._remove_lru_file(True)
695 evicted.append(e)
696 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400697
698 # Ensure maximum number of items in the cache.
699 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000700 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000701 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400702
703 # Ensure enough free space.
704 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400705 while (
706 self.policies.min_free_space and
707 self._lru and
708 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000709 # self._free_disk is updated by this call.
710 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400711
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000712 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000713 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400714 usage_percent = 0.
715 if total_usage:
716 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
717
718 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000719 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
720 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
Junji Watanabe38b28b02020-04-23 10:23:30 +0000721 '%.1fkb)', len(evicted),
722 sum(evicted) / 1024., self._free_disk / 1024., total_usage / 1024.,
723 usage_percent, self.policies.max_cache_size / 1024.)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400724 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000725 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400726
727 def _path(self, digest):
728 """Returns the path to one item."""
729 return os.path.join(self.cache_dir, digest)
730
731 def _remove_lru_file(self, allow_protected):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000732 """Removes the latest recently used file and returns its size.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000733
734 Updates self._free_disk.
735 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400736 self._lock.assert_locked()
737 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000738 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400739 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000740 total_size = sum(self._lru.values())
741 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000742 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
743 '%s bytes (%.3f GiB) free_space') % (
744 self.policies, total_size, float(total_size) / 1024**3,
745 len(self._lru), self._free_disk,
746 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400747 raise NoMoreSpace(msg)
748 except KeyError:
749 # That means an internal error.
750 raise NoMoreSpace('Nothing to remove, can\'t happend')
751 digest, (size, _) = self._lru.pop_oldest()
Takuto Ikuta8d8ca9b2021-02-26 02:31:43 +0000752 logging.debug('Removing LRU file %s with size %s bytes', digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400753 self._delete_file(digest, size)
754 return size
755
756 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
757 """Adds an item into LRU cache marking it as a newest one."""
758 self._lock.assert_locked()
759 if size == UNKNOWN_FILE_SIZE:
760 size = fs.stat(self._path(digest)).st_size
761 self._added.append(size)
762 self._lru.add(digest, size)
763 self._free_disk -= size
764 # Do a quicker version of self._trim(). It only enforces free disk space,
765 # not cache size limits. It doesn't actually look at real free disk space,
766 # only uses its cache values. self._trim() will be called later to enforce
767 # real trimming but doing this quick version here makes it possible to map
768 # an isolated that is larger than the current amount of free disk space when
769 # the cache size is already large.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000770 while (self.policies.min_free_space and self._lru and
771 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000772 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400773 if self._remove_lru_file(False) == -1:
774 break
775
776 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000777 """Deletes cache file from the file system.
778
779 Updates self._free_disk.
780 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400781 self._lock.assert_locked()
782 try:
783 if size == UNKNOWN_FILE_SIZE:
784 try:
785 size = fs.stat(self._path(digest)).st_size
786 except OSError:
787 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000788 if file_path.try_remove(self._path(digest)):
789 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400790 except OSError as e:
791 if e.errno != errno.ENOENT:
792 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400793
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000794 def _get_mtime(self, digest):
795 """Get mtime of cache file."""
796 return os.path.getmtime(self._path(digest))
797
798 def _is_valid_hash(self, digest):
799 """Verify digest with supported hash algos."""
800 for _, algo in isolated_format.SUPPORTED_ALGOS.items():
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000801 if digest == isolated_format.hash_file(self._path(digest), algo):
802 return True
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000803 return False
804
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400805
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400806class NamedCache(Cache):
807 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400808
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400809 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400810 name is a short identifier that describes the contents of the cache, e.g.
811 "git_v8" could be all git repositories required by v8 builds, or
812 "build_chromium" could be build artefacts of the Chromium.
813 path is a directory path relative to the task run dir. Cache installation
814 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400815 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400816 _DIR_ALPHABET = string.ascii_letters + string.digits
817 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000818 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400819
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400820 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400821 """Initializes NamedCaches.
822
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400823 Arguments:
824 - cache_dir is a directory for persistent cache storage.
825 - policies is a CachePolicies instance.
826 - time_fn is a function that returns timestamp (float) and used to take
827 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400828 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400829 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400830 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000831 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400832 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
833 self._lru = lru.LRUDict()
834 if not fs.isdir(self.cache_dir):
835 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000836 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000837 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400838 self._lru = lru.LRUDict.load(self.state_file)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000839 for _, size in self._lru.values():
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000840 if not isinstance(size, six.integer_types):
Takuto Ikuta6acf8f92020-07-02 02:06:42 +0000841 with open(self.state_file, 'r') as f:
842 logging.info('named cache state file: %s\n%s', self.state_file,
843 f.read())
Junji Watanabeedcf47d2020-06-11 08:41:01 +0000844 raise ValueError("size is not integer: %s" % size)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000845
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400846 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000847 logging.exception(
848 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400849 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000850 fs.makedirs(self.cache_dir)
Takuto Ikutadadfbb02020-07-10 03:31:26 +0000851 self._lru = lru.LRUDict()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000852 with self._lock:
853 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400854 if time_fn:
855 self._lru.time_fn = time_fn
856
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400857 @property
858 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000859 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400860 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000861 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400862
Takuto Ikutaeab23172020-07-02 03:50:02 +0000863 def _sudo_chown(self, path):
864 if sys.platform == 'win32':
865 return
866 uid = os.getuid()
867 if os.stat(path).st_uid == uid:
868 return
869 # Maybe owner of |path| is different from runner of this script. This is to
870 # make fs.rename work in that case.
871 # https://crbug.com/986676
872 subprocess.check_call(['sudo', '-n', 'chown', str(uid), path])
873
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000874 def install(self, dst, name):
875 """Creates the directory |dst| and moves a previous named cache |name| if it
876 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400877
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000878 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400879
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000880 Returns the reused named cache size in bytes, or 0 if none was present.
881
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400882 Raises NamedCacheError if cannot install the cache.
883 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000884 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400885 with self._lock:
886 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000887 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400888 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000889 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400890
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000891 # Remove the named symlink if it exists.
892 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000893 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000894 # Remove the symlink itself, not its destination.
895 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000896
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000897 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000898 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400899 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000900 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000901 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000902 file_path.ensure_tree(os.path.dirname(dst))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000903 self._sudo_chown(abs_cache)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000904 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400905 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000906 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400907
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000908 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400909 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400910
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000911 # The named cache does not exist, create an empty directory. When
912 # uninstalling, we will move it back to the cache and create an an
913 # entry.
914 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000915 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000916 return 0
Junji Watanabed2ab86b2021-08-13 07:20:23 +0000917 except (IOError, OSError, PermissionError) as ex:
Takuto Ikuta2fe58fd2021-08-18 13:47:36 +0000918 if sys.platform == 'win32':
919 print("There may be running process in cache"
920 " e.g. https://crbug.com/1239809#c14",
921 file=sys.stderr)
922 subprocess.check_call(
923 ["powershell", "get-process | select path,starttime"])
924
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000925 # Raise using the original traceback.
926 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000927 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000928 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000929 finally:
930 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400931
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000932 def uninstall(self, src, name):
933 """Moves the cache directory back into the named cache hive for an eventual
934 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400935
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000936 The opposite of install().
937
938 src must be absolute and unicode. Its content is moved back into the local
939 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400940
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000941 Returns the named cache size in bytes.
942
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400943 Raises NamedCacheError if cannot uninstall the cache.
944 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000945 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Junji Watanabe9cdfff52021-01-08 07:20:35 +0000946 start = time.time()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400947 with self._lock:
948 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000949 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400950 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000951 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000952 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400953 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400954
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000955 if name in self._lru:
956 # This shouldn't happen but just remove the preexisting one and move
957 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000958 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000959 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000960
Takuto Ikutac1bdcf22021-10-27 05:07:26 +0000961 # Calculate the size of the named cache to keep. It's important because
962 # if size is zero (it's empty), we do not want to add it back to the
963 # named caches cache.
Takuto Ikuta995da062021-03-17 05:01:59 +0000964 size = file_path.get_recursive_size(src)
Takuto Ikutac1bdcf22021-10-27 05:07:26 +0000965 logging.info('- Size is %d', size)
966 if not size:
967 # Do not save empty named cache.
968 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400969
970 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000971 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400972 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000973 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400974 file_path.ensure_tree(os.path.dirname(abs_cache))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000975 self._sudo_chown(src)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000976 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400977
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000978 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000979 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000980
981 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
982 # for user convenience.
983 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000984 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000985 file_path.remove(named_path)
986 else:
987 file_path.ensure_tree(os.path.dirname(named_path))
988
989 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000990 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000991 logging.info(
992 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000993 except OSError:
994 # Ignore on Windows. It happens when running as a normal user or when
995 # UAC is enabled and the user is a filtered administrator account.
996 if sys.platform != 'win32':
997 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000998 return size
Junji Watanabed2ab86b2021-08-13 07:20:23 +0000999 except (IOError, OSError, PermissionError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +00001000 # Raise using the original traceback.
1001 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +00001002 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001003 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001004 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001005 # Call save() at every uninstall. The assumptions are:
1006 # - The total the number of named caches is low, so the state.json file
1007 # is small, so the time it takes to write it to disk is short.
1008 # - The number of mapped named caches per task is low, so the number of
1009 # times save() is called on tear-down isn't high enough to be
1010 # significant.
1011 # - uninstall() sometimes throws due to file locking on Windows or
1012 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001013 self._save()
Junji Watanabe9cdfff52021-01-08 07:20:35 +00001014 logging.info('NamedCache.uninstall(%r, %r) took %d seconds', src, name,
1015 time.time() - start)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001016
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001017 # Cache interface implementation.
1018
1019 def __len__(self):
1020 with self._lock:
1021 return len(self._lru)
1022
1023 def __iter__(self):
1024 # This is not thread-safe.
1025 return self._lru.__iter__()
1026
John Budorickc6186972020-02-26 00:58:14 +00001027 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001028 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +00001029 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001030
1031 @property
1032 def total_size(self):
1033 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001034 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001035
1036 def get_oldest(self):
1037 with self._lock:
1038 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001039 # (key, (value, ts))
1040 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001041 except KeyError:
1042 return None
1043
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001044 def remove_oldest(self):
1045 with self._lock:
1046 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001047 _name, size = self._remove_lru_item()
1048 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001049
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001050 def save(self):
1051 with self._lock:
1052 return self._save()
1053
John Budorickc6186972020-02-26 00:58:14 +00001054 def touch(self, *names):
1055 with self._lock:
1056 for name in names:
1057 if name in self._lru:
1058 self._lru.touch(name)
1059 self._save()
1060
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001061 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001062 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001063 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001064 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001065 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001066
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001067 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001068 if self._policies.max_items:
1069 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001070 name, size = self._remove_lru_item()
1071 evicted.append(size)
1072 logging.info(
1073 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1074 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001075
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001076 # Trim according to maximum age.
1077 if self._policies.max_age_secs:
1078 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1079 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001080 _name, (_data, ts) = self._lru.get_oldest()
1081 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001082 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001083 name, size = self._remove_lru_item()
1084 evicted.append(size)
1085 logging.info(
1086 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1087 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001088
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001089 # Trim according to minimum free space.
1090 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001091 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001092 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001093 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001094 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001095 name, size = self._remove_lru_item()
1096 evicted.append(size)
1097 logging.info(
1098 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1099 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001100
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001101 # Trim according to maximum total size.
1102 if self._policies.max_cache_size:
1103 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001104 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001105 if total <= self._policies.max_cache_size:
1106 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001107 name, size = self._remove_lru_item()
1108 evicted.append(size)
1109 logging.info(
1110 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1111 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001112
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001113 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001114 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001115
1116 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001117 """Removes unknown directories.
1118
1119 Does not recalculate the cache size since it's surprisingly slow on some
1120 OSes.
1121 """
Junji Watanabe66041012021-08-11 06:40:08 +00001122 logging.info('NamedCache.cleanup(): Cleaning %s', self.cache_dir)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001123 success = True
1124 with self._lock:
1125 try:
1126 actual = set(fs.listdir(self.cache_dir))
1127 actual.discard(self.NAMED_DIR)
1128 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001129 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001130 # First, handle the actual cache content.
1131 # Remove missing entries.
1132 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001133 name, size = self._lru.pop(expected[missing])
1134 logging.warning(
1135 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001136 # Remove unexpected items.
1137 for unexpected in (actual - set(expected)):
1138 try:
1139 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001140 logging.warning(
1141 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001142 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001143 file_path.rmtree(p)
1144 else:
1145 fs.remove(p)
1146 except (IOError, OSError) as e:
1147 logging.error('Failed to remove %s: %s', unexpected, e)
1148 success = False
1149
1150 # Second, fix named cache links.
1151 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001152 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001153 actual = set(fs.listdir(named))
1154 expected = set(self._lru)
1155 # Confirm entries. Do not add missing ones for now.
1156 for name in expected.intersection(actual):
1157 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001158 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001159 if fs.islink(p):
1160 link = fs.readlink(p)
1161 if expected_link == link:
1162 continue
1163 logging.warning(
1164 'Unexpected symlink for cache %s: %s, expected %s',
1165 name, link, expected_link)
1166 else:
1167 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001168 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001169 file_path.rmtree(p)
1170 else:
1171 fs.remove(p)
1172 # Remove unexpected items.
1173 for unexpected in (actual - expected):
1174 try:
1175 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1176 if fs.isdir(p):
1177 file_path.rmtree(p)
1178 else:
1179 fs.remove(p)
1180 except (IOError, OSError) as e:
1181 logging.error('Failed to remove %s: %s', unexpected, e)
1182 success = False
1183 finally:
1184 self._save()
1185 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001186
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001187 # Internal functions.
1188
1189 def _try_upgrade(self):
1190 """Upgrades from the old format to the new one if necessary.
1191
1192 This code can be removed so all bots are known to have the right new format.
1193 """
1194 if not self._lru:
1195 return
1196 _name, (data, _ts) = self._lru.get_oldest()
1197 if isinstance(data, (list, tuple)):
1198 return
1199 # Update to v2.
1200 def upgrade(_name, rel_cache):
1201 abs_cache = os.path.join(self.cache_dir, rel_cache)
Takuto Ikuta995da062021-03-17 05:01:59 +00001202 return rel_cache, file_path.get_recursive_size(abs_cache)
1203
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001204 self._lru.transform(upgrade)
1205 self._save()
1206
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001207 def _remove_lru_item(self):
1208 """Removes the oldest LRU entry. LRU must not be empty."""
1209 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
Takuto Ikuta74686842021-07-30 04:11:03 +00001210 logging.info('Removing named cache %r, %d', name, size)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001211 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001212 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001213
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001214 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001215 """Creates and returns relative path of a new cache directory.
1216
1217 In practice, it is a 2-letter string.
1218 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001219 # We randomly generate directory names that have two lower/upper case
1220 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1221 abc_len = len(self._DIR_ALPHABET)
1222 tried = set()
1223 while len(tried) < 1000:
1224 i = random.randint(0, abc_len * abc_len - 1)
1225 rel_path = (
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001226 self._DIR_ALPHABET[i // abc_len] + self._DIR_ALPHABET[i % abc_len])
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001227 if rel_path in tried:
1228 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001229 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001230 if not fs.exists(abs_path):
1231 return rel_path
1232 tried.add(rel_path)
1233 raise NamedCacheError(
1234 'could not allocate a new cache dir, too many cache dirs')
1235
1236 def _remove(self, name):
1237 """Removes a cache directory and entry.
1238
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001239 Returns:
1240 Number of caches deleted.
1241 """
1242 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001243 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001244 named_dir = self._get_named_path(name)
1245 if fs.islink(named_dir):
1246 fs.unlink(named_dir)
1247
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001248 # Then remove the actual data.
1249 if name not in self._lru:
1250 return
1251 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001252 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001253 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001254 file_path.rmtree(abs_path)
1255 self._lru.pop(name)
1256
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001257 def _save(self):
1258 self._lock.assert_locked()
1259 file_path.ensure_tree(self.cache_dir)
1260 self._lru.save(self.state_file)
1261
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001262 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001263 return os.path.join(self.cache_dir, self.NAMED_DIR, name)