blob: 0d03dd36694d3dee5d54feca15a12842b24d414f [file] [log] [blame]
nodirf33b8d62016-10-26 22:34:58 -07001# Copyright 2016 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""This file implements Named Caches."""
6
7import contextlib
maruel681d6802017-01-17 16:56:03 -08008import logging
nodirf33b8d62016-10-26 22:34:58 -07009import optparse
10import os
11import random
12import re
13import string
Marc-Antoine Ruel5bbf52b2017-08-25 21:09:03 -040014import sys
nodirf33b8d62016-10-26 22:34:58 -070015
16from utils import lru
17from utils import file_path
18from utils import fs
19from utils import threading_utils
20
21
22# Keep synced with task_request.py
23CACHE_NAME_RE = re.compile(ur'^[a-z0-9_]{1,4096}$')
24MAX_CACHE_SIZE = 50
25
26
27class Error(Exception):
28 """Named cache specific error."""
29
30
31class CacheManager(object):
nodir26251c42017-05-11 13:21:53 -070032 """Manages cache directories exposed to a task.
nodirf33b8d62016-10-26 22:34:58 -070033
34 A task can specify that caches should be present on a bot. A cache is
35 tuple (name, path), where
36 name is a short identifier that describes the contents of the cache, e.g.
37 "git_v8" could be all git repositories required by v8 builds, or
38 "build_chromium" could be build artefacts of the Chromium.
nodir26251c42017-05-11 13:21:53 -070039 path is a directory path relative to the task run dir. Cache installation
40 puts the requested cache directory at the path.
nodirf33b8d62016-10-26 22:34:58 -070041 """
42
43 def __init__(self, root_dir):
44 """Initializes NamedCaches.
45
46 |root_dir| is a directory for persistent cache storage.
47 """
nodir26251c42017-05-11 13:21:53 -070048 assert isinstance(root_dir, unicode), root_dir
nodirf33b8d62016-10-26 22:34:58 -070049 assert file_path.isabs(root_dir), root_dir
nodir26251c42017-05-11 13:21:53 -070050 self.root_dir = root_dir
nodirf33b8d62016-10-26 22:34:58 -070051 self._lock = threading_utils.LockWithAssert()
52 # LRU {cache_name -> cache_location}
53 # It is saved to |root_dir|/state.json.
54 self._lru = None
55
56 @contextlib.contextmanager
57 def open(self, time_fn=None):
nodir26251c42017-05-11 13:21:53 -070058 """Opens NamedCaches for mutation operations, such as install.
nodirf33b8d62016-10-26 22:34:58 -070059
nodir26251c42017-05-11 13:21:53 -070060 Only one caller can open the cache manager at a time. If the same thread
nodirf33b8d62016-10-26 22:34:58 -070061 calls this function after opening it earlier, the call will deadlock.
62
63 time_fn is a function that returns timestamp (float) and used to take
64 timestamps when new caches are requested.
65
66 Returns a context manager that must be closed as soon as possible.
67 """
nodirf33b8d62016-10-26 22:34:58 -070068 with self._lock:
nodiraaeab8e2017-03-28 15:56:21 -070069 state_path = os.path.join(self.root_dir, u'state.json')
70 assert self._lru is None, 'acquired lock, but self._lru is not None'
nodirf33b8d62016-10-26 22:34:58 -070071 if os.path.isfile(state_path):
nodiraaeab8e2017-03-28 15:56:21 -070072 try:
73 self._lru = lru.LRUDict.load(state_path)
74 except ValueError:
75 logging.exception('failed to load named cache state file')
76 logging.warning('deleting named caches')
77 file_path.rmtree(self.root_dir)
78 self._lru = self._lru or lru.LRUDict()
nodirf33b8d62016-10-26 22:34:58 -070079 if time_fn:
80 self._lru.time_fn = time_fn
81 try:
82 yield
83 finally:
84 file_path.ensure_tree(self.root_dir)
85 self._lru.save(state_path)
86 self._lru = None
87
88 def __len__(self):
89 """Returns number of items in the cache.
90
nodir26251c42017-05-11 13:21:53 -070091 NamedCache must be open.
nodirf33b8d62016-10-26 22:34:58 -070092 """
93 return len(self._lru)
94
nodirf33b8d62016-10-26 22:34:58 -070095 def get_oldest(self):
96 """Returns name of the LRU cache or None.
97
nodir26251c42017-05-11 13:21:53 -070098 NamedCache must be open.
nodirf33b8d62016-10-26 22:34:58 -070099 """
100 self._lock.assert_locked()
101 try:
102 return self._lru.get_oldest()[0]
103 except KeyError:
104 return None
105
106 def get_timestamp(self, name):
107 """Returns timestamp of last use of an item.
108
nodir26251c42017-05-11 13:21:53 -0700109 NamedCache must be open.
nodirf33b8d62016-10-26 22:34:58 -0700110
111 Raises KeyError if cache is not found.
112 """
113 self._lock.assert_locked()
114 assert isinstance(name, basestring), name
115 return self._lru.get_timestamp(name)
116
nodir26251c42017-05-11 13:21:53 -0700117 @property
118 def available(self):
119 """Returns a set of names of available caches.
nodirf33b8d62016-10-26 22:34:58 -0700120
nodir26251c42017-05-11 13:21:53 -0700121 NamedCache must be open.
nodirf33b8d62016-10-26 22:34:58 -0700122 """
123 self._lock.assert_locked()
nodir26251c42017-05-11 13:21:53 -0700124 return self._lru.keys_set()
nodirf33b8d62016-10-26 22:34:58 -0700125
nodir26251c42017-05-11 13:21:53 -0700126 def install(self, path, name):
127 """Moves the directory for the specified named cache to |path|.
nodird6160682017-02-02 13:03:35 -0800128
nodir26251c42017-05-11 13:21:53 -0700129 NamedCache must be open. path must be absolute, unicode and must not exist.
130
131 Raises Error if cannot install the cache.
nodird6160682017-02-02 13:03:35 -0800132 """
nodir26251c42017-05-11 13:21:53 -0700133 self._lock.assert_locked()
134 logging.info('Installing named cache %r to %r', name, path)
135 try:
136 _check_abs(path)
137 if os.path.isdir(path):
138 raise Error('installation directory %r already exists' % path)
139
140 rel_cache = self._lru.get(name)
141 if rel_cache:
142 abs_cache = os.path.join(self.root_dir, rel_cache)
143 if os.path.isdir(abs_cache):
144 logging.info('Moving %r to %r', abs_cache, path)
145 file_path.ensure_tree(os.path.dirname(path))
146 fs.rename(abs_cache, path)
147 self._remove(name)
148 return
149
150 logging.warning('directory for named cache %r does not exist', name)
151 self._remove(name)
152
153 # The named cache does not exist, create an empty directory.
154 # When uninstalling, we will move it back to the cache and create an
155 # an entry.
156 file_path.ensure_tree(path)
157 except (OSError, Error) as ex:
158 raise Error(
159 'cannot install cache named %r at %r: %s' % (
160 name, path, ex))
161
162 def uninstall(self, path, name):
163 """Moves the cache directory back. Opposite to install().
164
165 NamedCache must be open. path must be absolute and unicode.
166
167 Raises Error if cannot uninstall the cache.
168 """
169 logging.info('Uninstalling named cache %r from %r', name, path)
170 try:
171 _check_abs(path)
172 if not os.path.isdir(path):
173 logging.warning(
174 'Directory %r does not exist anymore. Cache lost.', path)
175 return
176
177 rel_cache = self._lru.get(name)
178 if rel_cache:
179 # Do not crash because cache already exists.
180 logging.warning('overwriting an existing named cache %r', name)
181 create_named_link = False
182 else:
183 rel_cache = self._allocate_dir()
184 create_named_link = True
185
186 # Move the dir and create an entry for the named cache.
187 abs_cache = os.path.join(self.root_dir, rel_cache)
188 logging.info('Moving %r to %r', path, abs_cache)
189 file_path.ensure_tree(os.path.dirname(abs_cache))
190 fs.rename(path, abs_cache)
191 self._lru.add(name, rel_cache)
192
193 if create_named_link:
194 # Create symlink <root_dir>/<named>/<name> -> <root_dir>/<short name>
195 # for user convenience.
196 named_path = self._get_named_path(name)
197 if os.path.exists(named_path):
198 file_path.remove(named_path)
199 else:
200 file_path.ensure_tree(os.path.dirname(named_path))
Marc-Antoine Ruel5bbf52b2017-08-25 21:09:03 -0400201 try:
202 fs.symlink(abs_cache, named_path)
203 logging.info('Created symlink %r to %r', named_path, abs_cache)
204 except OSError:
205 # Ignore on Windows. It happens when running as a normal user or when
206 # UAC is enabled and the user is a filtered administrator account.
207 if sys.platform != 'win32':
208 raise
nodir26251c42017-05-11 13:21:53 -0700209 except (OSError, Error) as ex:
210 raise Error(
211 'cannot uninstall cache named %r at %r: %s' % (
212 name, path, ex))
nodird6160682017-02-02 13:03:35 -0800213
nodirf33b8d62016-10-26 22:34:58 -0700214 def trim(self, min_free_space):
215 """Purges cache.
216
217 Removes cache directories that were not accessed for a long time
218 until there is enough free space and the number of caches is sane.
219
220 If min_free_space is None, disk free space is not checked.
221
nodir26251c42017-05-11 13:21:53 -0700222 NamedCache must be open.
maruele6fc9382017-05-04 09:03:48 -0700223
224 Returns:
225 Number of caches deleted.
nodirf33b8d62016-10-26 22:34:58 -0700226 """
227 self._lock.assert_locked()
228 if not os.path.isdir(self.root_dir):
maruele6fc9382017-05-04 09:03:48 -0700229 return 0
nodirf33b8d62016-10-26 22:34:58 -0700230
maruele6fc9382017-05-04 09:03:48 -0700231 total = 0
nodirf33b8d62016-10-26 22:34:58 -0700232 free_space = 0
maruel681d6802017-01-17 16:56:03 -0800233 if min_free_space:
234 free_space = file_path.get_free_space(self.root_dir)
235 while ((min_free_space and free_space < min_free_space)
nodirf33b8d62016-10-26 22:34:58 -0700236 or len(self._lru) > MAX_CACHE_SIZE):
maruel681d6802017-01-17 16:56:03 -0800237 logging.info(
nodir26251c42017-05-11 13:21:53 -0700238 'Making space for named cache %d > %d or %d > %d',
maruel681d6802017-01-17 16:56:03 -0800239 free_space, min_free_space, len(self._lru), MAX_CACHE_SIZE)
nodirf33b8d62016-10-26 22:34:58 -0700240 try:
nodir26251c42017-05-11 13:21:53 -0700241 name, _ = self._lru.get_oldest()
nodirf33b8d62016-10-26 22:34:58 -0700242 except KeyError:
maruele6fc9382017-05-04 09:03:48 -0700243 return total
nodir26251c42017-05-11 13:21:53 -0700244 logging.info('Removing named cache %r', name)
245 self._remove(name)
maruel681d6802017-01-17 16:56:03 -0800246 if min_free_space:
nodirf33b8d62016-10-26 22:34:58 -0700247 free_space = file_path.get_free_space(self.root_dir)
maruele6fc9382017-05-04 09:03:48 -0700248 total += 1
249 return total
nodirf33b8d62016-10-26 22:34:58 -0700250
251 _DIR_ALPHABET = string.ascii_letters + string.digits
252
253 def _allocate_dir(self):
254 """Creates and returns relative path of a new cache directory."""
255 # We randomly generate directory names that have two lower/upper case
256 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
257 abc_len = len(self._DIR_ALPHABET)
258 tried = set()
259 while len(tried) < 1000:
nodir939a5dd2016-11-16 10:26:45 -0800260 i = random.randint(0, abc_len * abc_len - 1)
nodirf33b8d62016-10-26 22:34:58 -0700261 rel_path = (
262 self._DIR_ALPHABET[i / abc_len] +
263 self._DIR_ALPHABET[i % abc_len])
264 if rel_path in tried:
265 continue
266 abs_path = os.path.join(self.root_dir, rel_path)
267 if not fs.exists(abs_path):
268 return rel_path
269 tried.add(rel_path)
270 raise Error('could not allocate a new cache dir, too many cache dirs')
271
nodir26251c42017-05-11 13:21:53 -0700272 def _remove(self, name):
273 """Removes a cache directory and entry.
274
275 NamedCache must be open.
276
277 Returns:
278 Number of caches deleted.
279 """
280 self._lock.assert_locked()
281 rel_path = self._lru.get(name)
282 if not rel_path:
283 return
284
285 named_dir = self._get_named_path(name)
286 if fs.islink(named_dir):
287 fs.unlink(named_dir)
288
289 abs_path = os.path.join(self.root_dir, rel_path)
290 if os.path.isdir(abs_path):
291 file_path.rmtree(abs_path)
292 self._lru.pop(name)
293
nodirf33b8d62016-10-26 22:34:58 -0700294 def _get_named_path(self, name):
295 return os.path.join(self.root_dir, 'named', name)
296
297
298def add_named_cache_options(parser):
299 group = optparse.OptionGroup(parser, 'Named caches')
300 group.add_option(
301 '--named-cache',
302 dest='named_caches',
303 action='append',
304 nargs=2,
305 default=[],
306 help='A named cache to request. Accepts two arguments, name and path. '
307 'name identifies the cache, must match regex [a-z0-9_]{1,4096}. '
308 'path is a path relative to the run dir where the cache directory '
nodir26251c42017-05-11 13:21:53 -0700309 'must be put to. '
nodirf33b8d62016-10-26 22:34:58 -0700310 'This option can be specified more than once.')
311 group.add_option(
312 '--named-cache-root',
313 help='Cache root directory. Default=%default')
314 parser.add_option_group(group)
315
316
317def process_named_cache_options(parser, options):
318 """Validates named cache options and returns a CacheManager."""
319 if options.named_caches and not options.named_cache_root:
320 parser.error('--named-cache is specified, but --named-cache-root is empty')
321 for name, path in options.named_caches:
322 if not CACHE_NAME_RE.match(name):
323 parser.error(
nodir26251c42017-05-11 13:21:53 -0700324 'cache name %r does not match %r' % (name, CACHE_NAME_RE.pattern))
nodirf33b8d62016-10-26 22:34:58 -0700325 if not path:
326 parser.error('cache path cannot be empty')
327 if options.named_cache_root:
nodir26251c42017-05-11 13:21:53 -0700328 return CacheManager(unicode(os.path.abspath(options.named_cache_root)))
nodirf33b8d62016-10-26 22:34:58 -0700329 return None
nodird6160682017-02-02 13:03:35 -0800330
331
nodir26251c42017-05-11 13:21:53 -0700332def _check_abs(path):
333 if not isinstance(path, unicode):
334 raise Error('named cache installation path must be unicode')
335 if not os.path.isabs(path):
336 raise Error('named cache installation path must be absolute')