blob: 2f8f8cf659e40326f38a9cb86f9252886fa05435 [file] [log] [blame]
nodirf33b8d62016-10-26 22:34:58 -07001# Copyright 2016 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""This file implements Named Caches."""
6
7import contextlib
maruel681d6802017-01-17 16:56:03 -08008import logging
nodirf33b8d62016-10-26 22:34:58 -07009import optparse
10import os
11import random
12import re
13import string
14
15from utils import lru
16from utils import file_path
17from utils import fs
18from utils import threading_utils
19
20
21# Keep synced with task_request.py
22CACHE_NAME_RE = re.compile(ur'^[a-z0-9_]{1,4096}$')
23MAX_CACHE_SIZE = 50
24
25
26class Error(Exception):
27 """Named cache specific error."""
28
29
30class CacheManager(object):
nodir68e2bb12017-05-10 12:18:44 -070031 """Manages cache directories exposed to a task.
nodirf33b8d62016-10-26 22:34:58 -070032
33 A task can specify that caches should be present on a bot. A cache is
34 tuple (name, path), where
35 name is a short identifier that describes the contents of the cache, e.g.
36 "git_v8" could be all git repositories required by v8 builds, or
37 "build_chromium" could be build artefacts of the Chromium.
nodir68e2bb12017-05-10 12:18:44 -070038 path is a directory path relative to the task run dir. Cache installation
39 puts the requested cache directory at the path.
nodirf33b8d62016-10-26 22:34:58 -070040 """
41
42 def __init__(self, root_dir):
43 """Initializes NamedCaches.
44
45 |root_dir| is a directory for persistent cache storage.
46 """
nodir68e2bb12017-05-10 12:18:44 -070047 assert isinstance(root_dir, unicode), root_dir
nodirf33b8d62016-10-26 22:34:58 -070048 assert file_path.isabs(root_dir), root_dir
nodir68e2bb12017-05-10 12:18:44 -070049 self.root_dir = root_dir
nodirf33b8d62016-10-26 22:34:58 -070050 self._lock = threading_utils.LockWithAssert()
51 # LRU {cache_name -> cache_location}
52 # It is saved to |root_dir|/state.json.
53 self._lru = None
54
55 @contextlib.contextmanager
56 def open(self, time_fn=None):
nodir68e2bb12017-05-10 12:18:44 -070057 """Opens NamedCaches for mutation operations, such as install.
nodirf33b8d62016-10-26 22:34:58 -070058
nodir68e2bb12017-05-10 12:18:44 -070059 Only one caller can open the cache manager at a time. If the same thread
nodirf33b8d62016-10-26 22:34:58 -070060 calls this function after opening it earlier, the call will deadlock.
61
62 time_fn is a function that returns timestamp (float) and used to take
63 timestamps when new caches are requested.
64
65 Returns a context manager that must be closed as soon as possible.
66 """
nodirf33b8d62016-10-26 22:34:58 -070067 with self._lock:
nodiraaeab8e2017-03-28 15:56:21 -070068 state_path = os.path.join(self.root_dir, u'state.json')
69 assert self._lru is None, 'acquired lock, but self._lru is not None'
nodirf33b8d62016-10-26 22:34:58 -070070 if os.path.isfile(state_path):
nodiraaeab8e2017-03-28 15:56:21 -070071 try:
72 self._lru = lru.LRUDict.load(state_path)
73 except ValueError:
74 logging.exception('failed to load named cache state file')
75 logging.warning('deleting named caches')
76 file_path.rmtree(self.root_dir)
77 self._lru = self._lru or lru.LRUDict()
nodirf33b8d62016-10-26 22:34:58 -070078 if time_fn:
79 self._lru.time_fn = time_fn
80 try:
81 yield
82 finally:
83 file_path.ensure_tree(self.root_dir)
84 self._lru.save(state_path)
85 self._lru = None
86
87 def __len__(self):
88 """Returns number of items in the cache.
89
nodir68e2bb12017-05-10 12:18:44 -070090 NamedCache must be open.
nodirf33b8d62016-10-26 22:34:58 -070091 """
92 return len(self._lru)
93
nodirf33b8d62016-10-26 22:34:58 -070094 def get_oldest(self):
95 """Returns name of the LRU cache or None.
96
nodir68e2bb12017-05-10 12:18:44 -070097 NamedCache must be open.
nodirf33b8d62016-10-26 22:34:58 -070098 """
99 self._lock.assert_locked()
100 try:
101 return self._lru.get_oldest()[0]
102 except KeyError:
103 return None
104
105 def get_timestamp(self, name):
106 """Returns timestamp of last use of an item.
107
nodir68e2bb12017-05-10 12:18:44 -0700108 NamedCache must be open.
nodirf33b8d62016-10-26 22:34:58 -0700109
110 Raises KeyError if cache is not found.
111 """
112 self._lock.assert_locked()
113 assert isinstance(name, basestring), name
114 return self._lru.get_timestamp(name)
115
nodir68e2bb12017-05-10 12:18:44 -0700116 @property
117 def available(self):
118 """Returns a set of names of available caches.
nodirf33b8d62016-10-26 22:34:58 -0700119
nodir68e2bb12017-05-10 12:18:44 -0700120 NamedCache must be open.
nodirf33b8d62016-10-26 22:34:58 -0700121 """
122 self._lock.assert_locked()
nodir68e2bb12017-05-10 12:18:44 -0700123 return self._lru.keys_set()
nodirf33b8d62016-10-26 22:34:58 -0700124
nodir68e2bb12017-05-10 12:18:44 -0700125 def install(self, path, name):
126 """Moves the directory for the specified named cache to |path|.
nodird6160682017-02-02 13:03:35 -0800127
nodir68e2bb12017-05-10 12:18:44 -0700128 NamedCache must be open. path must be absolute, unicode and must not exist.
129
130 Raises Error if cannot install the cache.
nodird6160682017-02-02 13:03:35 -0800131 """
nodir68e2bb12017-05-10 12:18:44 -0700132 self._lock.assert_locked()
133 logging.info('Installing named cache %r to %r', name, path)
134 try:
135 _check_abs(path)
136 if os.path.isdir(path):
137 raise Error('installation directory %r already exists' % path)
138
139 rel_cache = self._lru.get(name)
140 if rel_cache:
141 abs_cache = os.path.join(self.root_dir, rel_cache)
142 if os.path.isdir(abs_cache):
143 logging.info('Moving %r to %r', abs_cache, path)
144 file_path.ensure_tree(os.path.dirname(path))
145 fs.rename(abs_cache, path)
146 self._remove(name)
147 return
148
149 logging.warning('directory for named cache %r does not exist', name)
150 self._remove(name)
151
152 # The named cache does not exist, create an empty directory.
153 # When uninstalling, we will move it back to the cache and create an
154 # an entry.
155 file_path.ensure_tree(path)
156 except (OSError, Error) as ex:
157 raise Error(
158 'cannot install cache named %r at %r: %s' % (
159 name, path, ex))
160
161 def uninstall(self, path, name):
162 """Moves the cache directory back. Opposite to install().
163
164 NamedCache must be open. path must be absolute and unicode.
165
166 Raises Error if cannot uninstall the cache.
167 """
168 logging.info('Uninstalling named cache %r from %r', name, path)
169 try:
170 _check_abs(path)
171 if not os.path.isdir(path):
172 logging.warning(
173 'Directory %r does not exist anymore. Cache lost.', path)
174 return
175
176 rel_cache = self._lru.get(name)
177 if rel_cache:
178 # Do not crash because cache already exists.
179 logging.warning('overwriting an existing named cache %r', name)
180 create_named_link = False
181 else:
182 rel_cache = self._allocate_dir()
183 create_named_link = True
184
185 # Move the dir and create an entry for the named cache.
186 abs_cache = os.path.join(self.root_dir, rel_cache)
187 logging.info('Moving %r to %r', path, abs_cache)
188 file_path.ensure_tree(os.path.dirname(abs_cache))
189 fs.rename(path, abs_cache)
190 self._lru.add(name, rel_cache)
191
192 if create_named_link:
193 # Create symlink <root_dir>/<named>/<name> -> <root_dir>/<short name>
194 # for user convenience.
195 named_path = self._get_named_path(name)
196 if os.path.exists(named_path):
197 file_path.remove(named_path)
198 else:
199 file_path.ensure_tree(os.path.dirname(named_path))
200 fs.symlink(abs_cache, named_path)
201 logging.info('Created symlink %r to %r', named_path, abs_cache)
202 except (OSError, Error) as ex:
203 raise Error(
204 'cannot uninstall cache named %r at %r: %s' % (
205 name, path, ex))
nodird6160682017-02-02 13:03:35 -0800206
nodirf33b8d62016-10-26 22:34:58 -0700207 def trim(self, min_free_space):
208 """Purges cache.
209
210 Removes cache directories that were not accessed for a long time
211 until there is enough free space and the number of caches is sane.
212
213 If min_free_space is None, disk free space is not checked.
214
nodir68e2bb12017-05-10 12:18:44 -0700215 NamedCache must be open.
maruele6fc9382017-05-04 09:03:48 -0700216
217 Returns:
218 Number of caches deleted.
nodirf33b8d62016-10-26 22:34:58 -0700219 """
220 self._lock.assert_locked()
221 if not os.path.isdir(self.root_dir):
maruele6fc9382017-05-04 09:03:48 -0700222 return 0
nodirf33b8d62016-10-26 22:34:58 -0700223
maruele6fc9382017-05-04 09:03:48 -0700224 total = 0
nodirf33b8d62016-10-26 22:34:58 -0700225 free_space = 0
maruel681d6802017-01-17 16:56:03 -0800226 if min_free_space:
227 free_space = file_path.get_free_space(self.root_dir)
228 while ((min_free_space and free_space < min_free_space)
nodirf33b8d62016-10-26 22:34:58 -0700229 or len(self._lru) > MAX_CACHE_SIZE):
maruel681d6802017-01-17 16:56:03 -0800230 logging.info(
nodir68e2bb12017-05-10 12:18:44 -0700231 'Making space for named cache %d > %d or %d > %d',
maruel681d6802017-01-17 16:56:03 -0800232 free_space, min_free_space, len(self._lru), MAX_CACHE_SIZE)
nodirf33b8d62016-10-26 22:34:58 -0700233 try:
nodir68e2bb12017-05-10 12:18:44 -0700234 name, _ = self._lru.get_oldest()
nodirf33b8d62016-10-26 22:34:58 -0700235 except KeyError:
maruele6fc9382017-05-04 09:03:48 -0700236 return total
nodir68e2bb12017-05-10 12:18:44 -0700237 logging.info('Removing named cache %r', name)
238 self._remove(name)
maruel681d6802017-01-17 16:56:03 -0800239 if min_free_space:
nodirf33b8d62016-10-26 22:34:58 -0700240 free_space = file_path.get_free_space(self.root_dir)
maruele6fc9382017-05-04 09:03:48 -0700241 total += 1
242 return total
nodirf33b8d62016-10-26 22:34:58 -0700243
244 _DIR_ALPHABET = string.ascii_letters + string.digits
245
246 def _allocate_dir(self):
247 """Creates and returns relative path of a new cache directory."""
248 # We randomly generate directory names that have two lower/upper case
249 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
250 abc_len = len(self._DIR_ALPHABET)
251 tried = set()
252 while len(tried) < 1000:
nodir939a5dd2016-11-16 10:26:45 -0800253 i = random.randint(0, abc_len * abc_len - 1)
nodirf33b8d62016-10-26 22:34:58 -0700254 rel_path = (
255 self._DIR_ALPHABET[i / abc_len] +
256 self._DIR_ALPHABET[i % abc_len])
257 if rel_path in tried:
258 continue
259 abs_path = os.path.join(self.root_dir, rel_path)
260 if not fs.exists(abs_path):
261 return rel_path
262 tried.add(rel_path)
263 raise Error('could not allocate a new cache dir, too many cache dirs')
264
nodir68e2bb12017-05-10 12:18:44 -0700265 def _remove(self, name):
266 """Removes a cache directory and entry.
267
268 NamedCache must be open.
269
270 Returns:
271 Number of caches deleted.
272 """
273 self._lock.assert_locked()
274 rel_path = self._lru.get(name)
275 if not rel_path:
276 return
277
278 named_dir = self._get_named_path(name)
279 if fs.islink(named_dir):
280 fs.unlink(named_dir)
281
282 abs_path = os.path.join(self.root_dir, rel_path)
283 if os.path.isdir(abs_path):
284 file_path.rmtree(abs_path)
285 self._lru.pop(name)
286
nodirf33b8d62016-10-26 22:34:58 -0700287 def _get_named_path(self, name):
288 return os.path.join(self.root_dir, 'named', name)
289
290
291def add_named_cache_options(parser):
292 group = optparse.OptionGroup(parser, 'Named caches')
293 group.add_option(
294 '--named-cache',
295 dest='named_caches',
296 action='append',
297 nargs=2,
298 default=[],
299 help='A named cache to request. Accepts two arguments, name and path. '
300 'name identifies the cache, must match regex [a-z0-9_]{1,4096}. '
301 'path is a path relative to the run dir where the cache directory '
nodir68e2bb12017-05-10 12:18:44 -0700302 'must be put to. '
nodirf33b8d62016-10-26 22:34:58 -0700303 'This option can be specified more than once.')
304 group.add_option(
305 '--named-cache-root',
306 help='Cache root directory. Default=%default')
307 parser.add_option_group(group)
308
309
310def process_named_cache_options(parser, options):
311 """Validates named cache options and returns a CacheManager."""
312 if options.named_caches and not options.named_cache_root:
313 parser.error('--named-cache is specified, but --named-cache-root is empty')
314 for name, path in options.named_caches:
315 if not CACHE_NAME_RE.match(name):
316 parser.error(
nodir68e2bb12017-05-10 12:18:44 -0700317 'cache name %r does not match %r' % (name, CACHE_NAME_RE.pattern))
nodirf33b8d62016-10-26 22:34:58 -0700318 if not path:
319 parser.error('cache path cannot be empty')
320 if options.named_cache_root:
nodir68e2bb12017-05-10 12:18:44 -0700321 return CacheManager(unicode(os.path.abspath(options.named_cache_root)))
nodirf33b8d62016-10-26 22:34:58 -0700322 return None
nodird6160682017-02-02 13:03:35 -0800323
324
nodir68e2bb12017-05-10 12:18:44 -0700325def _check_abs(path):
326 if not isinstance(path, unicode):
327 raise Error('named cache installation path must be unicode')
328 if not os.path.isabs(path):
329 raise Error('named cache installation path must be absolute')