blob: 7566ccca9a5da9917e19e44584be641031861bdb [file] [log] [blame]
nodirf33b8d62016-10-26 22:34:58 -07001# Copyright 2016 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""This file implements Named Caches."""
6
7import contextlib
maruel681d6802017-01-17 16:56:03 -08008import logging
nodirf33b8d62016-10-26 22:34:58 -07009import optparse
10import os
11import random
12import re
13import string
14
15from utils import lru
16from utils import file_path
17from utils import fs
18from utils import threading_utils
19
20
21# Keep synced with task_request.py
22CACHE_NAME_RE = re.compile(ur'^[a-z0-9_]{1,4096}$')
23MAX_CACHE_SIZE = 50
24
25
26class Error(Exception):
27 """Named cache specific error."""
28
29
30class CacheManager(object):
31 """Manages cache directories exposed to a task as symlinks.
32
33 A task can specify that caches should be present on a bot. A cache is
34 tuple (name, path), where
35 name is a short identifier that describes the contents of the cache, e.g.
36 "git_v8" could be all git repositories required by v8 builds, or
37 "build_chromium" could be build artefacts of the Chromium.
38 path is a directory path relative to the task run dir. It will be mapped
39 to the cache directory persisted on the bot.
40 """
41
42 def __init__(self, root_dir):
43 """Initializes NamedCaches.
44
45 |root_dir| is a directory for persistent cache storage.
46 """
47 assert file_path.isabs(root_dir), root_dir
48 self.root_dir = unicode(root_dir)
49 self._lock = threading_utils.LockWithAssert()
50 # LRU {cache_name -> cache_location}
51 # It is saved to |root_dir|/state.json.
52 self._lru = None
53
54 @contextlib.contextmanager
55 def open(self, time_fn=None):
56 """Opens NamedCaches for mutation operations, such as request or trim.
57
58 Only on caller can open the cache manager at a time. If the same thread
59 calls this function after opening it earlier, the call will deadlock.
60
61 time_fn is a function that returns timestamp (float) and used to take
62 timestamps when new caches are requested.
63
64 Returns a context manager that must be closed as soon as possible.
65 """
nodirf33b8d62016-10-26 22:34:58 -070066 with self._lock:
nodiraaeab8e2017-03-28 15:56:21 -070067 state_path = os.path.join(self.root_dir, u'state.json')
68 assert self._lru is None, 'acquired lock, but self._lru is not None'
nodirf33b8d62016-10-26 22:34:58 -070069 if os.path.isfile(state_path):
nodiraaeab8e2017-03-28 15:56:21 -070070 try:
71 self._lru = lru.LRUDict.load(state_path)
72 except ValueError:
73 logging.exception('failed to load named cache state file')
74 logging.warning('deleting named caches')
75 file_path.rmtree(self.root_dir)
76 self._lru = self._lru or lru.LRUDict()
nodirf33b8d62016-10-26 22:34:58 -070077 if time_fn:
78 self._lru.time_fn = time_fn
79 try:
80 yield
81 finally:
82 file_path.ensure_tree(self.root_dir)
83 self._lru.save(state_path)
84 self._lru = None
85
86 def __len__(self):
87 """Returns number of items in the cache.
88
89 Requires NamedCache to be open.
90 """
91 return len(self._lru)
92
93 def request(self, name):
94 """Returns an absolute path to the directory of the named cache.
95
96 Creates a cache directory if it does not exist yet.
97
98 Requires NamedCache to be open.
99 """
100 self._lock.assert_locked()
101 assert isinstance(name, basestring), name
102 path = self._lru.get(name)
103 create_named_link = False
104 if path is None:
105 path = self._allocate_dir()
106 create_named_link = True
maruel681d6802017-01-17 16:56:03 -0800107 logging.info('Created %r for %r', path, name)
nodirf33b8d62016-10-26 22:34:58 -0700108 abs_path = os.path.join(self.root_dir, path)
109
maruel681d6802017-01-17 16:56:03 -0800110 # TODO(maruel): That's weird, it should exist already.
nodirf33b8d62016-10-26 22:34:58 -0700111 file_path.ensure_tree(abs_path)
112 self._lru.add(name, path)
113
114 if create_named_link:
115 # Create symlink <root_dir>/<named>/<name> -> <root_dir>/<short name>
116 # for user convenience.
117 named_path = self._get_named_path(name)
118 if os.path.exists(named_path):
119 file_path.remove(named_path)
120 else:
121 file_path.ensure_tree(os.path.dirname(named_path))
maruel681d6802017-01-17 16:56:03 -0800122 logging.info('Symlink %r to %r', named_path, abs_path)
nodirf33b8d62016-10-26 22:34:58 -0700123 fs.symlink(abs_path, named_path)
124
125 return abs_path
126
127 def get_oldest(self):
128 """Returns name of the LRU cache or None.
129
130 Requires NamedCache to be open.
131 """
132 self._lock.assert_locked()
133 try:
134 return self._lru.get_oldest()[0]
135 except KeyError:
136 return None
137
138 def get_timestamp(self, name):
139 """Returns timestamp of last use of an item.
140
141 Requires NamedCache to be open.
142
143 Raises KeyError if cache is not found.
144 """
145 self._lock.assert_locked()
146 assert isinstance(name, basestring), name
147 return self._lru.get_timestamp(name)
148
nodird6160682017-02-02 13:03:35 -0800149 @contextlib.contextmanager
nodirf33b8d62016-10-26 22:34:58 -0700150 def create_symlinks(self, root, named_caches):
nodird6160682017-02-02 13:03:35 -0800151 """Creates symlinks in |root| for the specified named_caches.
nodirf33b8d62016-10-26 22:34:58 -0700152
153 named_caches must be a list of (name, path) tuples.
154
155 Requires NamedCache to be open.
156
157 Raises Error if cannot create a symlink.
158 """
159 self._lock.assert_locked()
160 for name, path in named_caches:
maruel681d6802017-01-17 16:56:03 -0800161 logging.info('Named cache %r -> %r', name, path)
nodirf33b8d62016-10-26 22:34:58 -0700162 try:
nodird6160682017-02-02 13:03:35 -0800163 _validate_named_cache_path(path)
nodirf33b8d62016-10-26 22:34:58 -0700164 symlink_path = os.path.abspath(os.path.join(root, path))
165 file_path.ensure_tree(os.path.dirname(symlink_path))
maruel681d6802017-01-17 16:56:03 -0800166 requested = self.request(name)
167 logging.info('Symlink %r to %r', symlink_path, requested)
168 fs.symlink(requested, symlink_path)
nodirf33b8d62016-10-26 22:34:58 -0700169 except (OSError, Error) as ex:
170 raise Error(
171 'cannot create a symlink for cache named "%s" at "%s": %s' % (
172 name, symlink_path, ex))
173
nodird6160682017-02-02 13:03:35 -0800174 def delete_symlinks(self, root, named_caches):
175 """Deletes symlinks from |root| for the specified named_caches.
176
177 named_caches must be a list of (name, path) tuples.
178 """
179 for name, path in named_caches:
180 logging.info('Unlinking named cache "%s"', name)
181 try:
182 _validate_named_cache_path(path)
183 symlink_path = os.path.abspath(os.path.join(root, path))
184 fs.unlink(symlink_path)
185 except (OSError, Error) as ex:
186 raise Error(
187 'cannot unlink cache named "%s" at "%s": %s' % (
188 name, symlink_path, ex))
189
nodirf33b8d62016-10-26 22:34:58 -0700190 def trim(self, min_free_space):
191 """Purges cache.
192
193 Removes cache directories that were not accessed for a long time
194 until there is enough free space and the number of caches is sane.
195
196 If min_free_space is None, disk free space is not checked.
197
198 Requires NamedCache to be open.
199 """
200 self._lock.assert_locked()
201 if not os.path.isdir(self.root_dir):
202 return
203
204 free_space = 0
maruel681d6802017-01-17 16:56:03 -0800205 if min_free_space:
206 free_space = file_path.get_free_space(self.root_dir)
207 while ((min_free_space and free_space < min_free_space)
nodirf33b8d62016-10-26 22:34:58 -0700208 or len(self._lru) > MAX_CACHE_SIZE):
maruel681d6802017-01-17 16:56:03 -0800209 logging.info(
210 'Making space for named cache %s > %s or %s > %s',
211 free_space, min_free_space, len(self._lru), MAX_CACHE_SIZE)
nodirf33b8d62016-10-26 22:34:58 -0700212 try:
213 name, (path, _) = self._lru.get_oldest()
214 except KeyError:
215 return
216 named_dir = self._get_named_path(name)
217 if fs.islink(named_dir):
218 fs.unlink(named_dir)
219 path_abs = os.path.join(self.root_dir, path)
220 if os.path.isdir(path_abs):
maruel681d6802017-01-17 16:56:03 -0800221 logging.info('Removing named cache %s', path_abs)
nodirf33b8d62016-10-26 22:34:58 -0700222 file_path.rmtree(path_abs)
maruel681d6802017-01-17 16:56:03 -0800223 if min_free_space:
nodirf33b8d62016-10-26 22:34:58 -0700224 free_space = file_path.get_free_space(self.root_dir)
225 self._lru.pop(name)
226
227 _DIR_ALPHABET = string.ascii_letters + string.digits
228
229 def _allocate_dir(self):
230 """Creates and returns relative path of a new cache directory."""
231 # We randomly generate directory names that have two lower/upper case
232 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
233 abc_len = len(self._DIR_ALPHABET)
234 tried = set()
235 while len(tried) < 1000:
nodir939a5dd2016-11-16 10:26:45 -0800236 i = random.randint(0, abc_len * abc_len - 1)
nodirf33b8d62016-10-26 22:34:58 -0700237 rel_path = (
238 self._DIR_ALPHABET[i / abc_len] +
239 self._DIR_ALPHABET[i % abc_len])
240 if rel_path in tried:
241 continue
242 abs_path = os.path.join(self.root_dir, rel_path)
243 if not fs.exists(abs_path):
244 return rel_path
245 tried.add(rel_path)
246 raise Error('could not allocate a new cache dir, too many cache dirs')
247
248 def _get_named_path(self, name):
249 return os.path.join(self.root_dir, 'named', name)
250
251
252def add_named_cache_options(parser):
253 group = optparse.OptionGroup(parser, 'Named caches')
254 group.add_option(
255 '--named-cache',
256 dest='named_caches',
257 action='append',
258 nargs=2,
259 default=[],
260 help='A named cache to request. Accepts two arguments, name and path. '
261 'name identifies the cache, must match regex [a-z0-9_]{1,4096}. '
262 'path is a path relative to the run dir where the cache directory '
263 'must be symlinked to. '
264 'This option can be specified more than once.')
265 group.add_option(
266 '--named-cache-root',
267 help='Cache root directory. Default=%default')
268 parser.add_option_group(group)
269
270
271def process_named_cache_options(parser, options):
272 """Validates named cache options and returns a CacheManager."""
273 if options.named_caches and not options.named_cache_root:
274 parser.error('--named-cache is specified, but --named-cache-root is empty')
275 for name, path in options.named_caches:
276 if not CACHE_NAME_RE.match(name):
277 parser.error(
278 'cache name "%s" does not match %s' % (name, CACHE_NAME_RE.pattern))
279 if not path:
280 parser.error('cache path cannot be empty')
281 if options.named_cache_root:
282 return CacheManager(os.path.abspath(options.named_cache_root))
283 return None
nodird6160682017-02-02 13:03:35 -0800284
285
286def _validate_named_cache_path(path):
287 if os.path.isabs(path):
288 raise Error('named cache path must not be absolute')
289 if '..' in path.split(os.path.sep):
290 raise Error('named cache path must not contain ".."')