blob: 50a4e1c04fb9005f5d4a83150f3ee405f0044e63 [file] [log] [blame]
nodirf33b8d62016-10-26 22:34:58 -07001# Copyright 2016 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""This file implements Named Caches."""
6
7import contextlib
maruel681d6802017-01-17 16:56:03 -08008import logging
nodirf33b8d62016-10-26 22:34:58 -07009import optparse
10import os
11import random
12import re
13import string
14
15from utils import lru
16from utils import file_path
17from utils import fs
18from utils import threading_utils
19
20
21# Keep synced with task_request.py
22CACHE_NAME_RE = re.compile(ur'^[a-z0-9_]{1,4096}$')
23MAX_CACHE_SIZE = 50
24
25
26class Error(Exception):
27 """Named cache specific error."""
28
29
30class CacheManager(object):
31 """Manages cache directories exposed to a task as symlinks.
32
33 A task can specify that caches should be present on a bot. A cache is
34 tuple (name, path), where
35 name is a short identifier that describes the contents of the cache, e.g.
36 "git_v8" could be all git repositories required by v8 builds, or
37 "build_chromium" could be build artefacts of the Chromium.
38 path is a directory path relative to the task run dir. It will be mapped
39 to the cache directory persisted on the bot.
40 """
41
42 def __init__(self, root_dir):
43 """Initializes NamedCaches.
44
45 |root_dir| is a directory for persistent cache storage.
46 """
47 assert file_path.isabs(root_dir), root_dir
48 self.root_dir = unicode(root_dir)
49 self._lock = threading_utils.LockWithAssert()
50 # LRU {cache_name -> cache_location}
51 # It is saved to |root_dir|/state.json.
52 self._lru = None
53
54 @contextlib.contextmanager
55 def open(self, time_fn=None):
56 """Opens NamedCaches for mutation operations, such as request or trim.
57
58 Only on caller can open the cache manager at a time. If the same thread
59 calls this function after opening it earlier, the call will deadlock.
60
61 time_fn is a function that returns timestamp (float) and used to take
62 timestamps when new caches are requested.
63
64 Returns a context manager that must be closed as soon as possible.
65 """
66 state_path = os.path.join(self.root_dir, u'state.json')
67 with self._lock:
68 if os.path.isfile(state_path):
69 self._lru = lru.LRUDict.load(state_path)
70 else:
71 self._lru = lru.LRUDict()
72 if time_fn:
73 self._lru.time_fn = time_fn
74 try:
75 yield
76 finally:
77 file_path.ensure_tree(self.root_dir)
78 self._lru.save(state_path)
79 self._lru = None
80
81 def __len__(self):
82 """Returns number of items in the cache.
83
84 Requires NamedCache to be open.
85 """
86 return len(self._lru)
87
88 def request(self, name):
89 """Returns an absolute path to the directory of the named cache.
90
91 Creates a cache directory if it does not exist yet.
92
93 Requires NamedCache to be open.
94 """
95 self._lock.assert_locked()
96 assert isinstance(name, basestring), name
97 path = self._lru.get(name)
98 create_named_link = False
99 if path is None:
100 path = self._allocate_dir()
101 create_named_link = True
maruel681d6802017-01-17 16:56:03 -0800102 logging.info('Created %r for %r', path, name)
nodirf33b8d62016-10-26 22:34:58 -0700103 abs_path = os.path.join(self.root_dir, path)
104
maruel681d6802017-01-17 16:56:03 -0800105 # TODO(maruel): That's weird, it should exist already.
nodirf33b8d62016-10-26 22:34:58 -0700106 file_path.ensure_tree(abs_path)
107 self._lru.add(name, path)
108
109 if create_named_link:
110 # Create symlink <root_dir>/<named>/<name> -> <root_dir>/<short name>
111 # for user convenience.
112 named_path = self._get_named_path(name)
113 if os.path.exists(named_path):
114 file_path.remove(named_path)
115 else:
116 file_path.ensure_tree(os.path.dirname(named_path))
maruel681d6802017-01-17 16:56:03 -0800117 logging.info('Symlink %r to %r', named_path, abs_path)
nodirf33b8d62016-10-26 22:34:58 -0700118 fs.symlink(abs_path, named_path)
119
120 return abs_path
121
122 def get_oldest(self):
123 """Returns name of the LRU cache or None.
124
125 Requires NamedCache to be open.
126 """
127 self._lock.assert_locked()
128 try:
129 return self._lru.get_oldest()[0]
130 except KeyError:
131 return None
132
133 def get_timestamp(self, name):
134 """Returns timestamp of last use of an item.
135
136 Requires NamedCache to be open.
137
138 Raises KeyError if cache is not found.
139 """
140 self._lock.assert_locked()
141 assert isinstance(name, basestring), name
142 return self._lru.get_timestamp(name)
143
nodird6160682017-02-02 13:03:35 -0800144 @contextlib.contextmanager
nodirf33b8d62016-10-26 22:34:58 -0700145 def create_symlinks(self, root, named_caches):
nodird6160682017-02-02 13:03:35 -0800146 """Creates symlinks in |root| for the specified named_caches.
nodirf33b8d62016-10-26 22:34:58 -0700147
148 named_caches must be a list of (name, path) tuples.
149
150 Requires NamedCache to be open.
151
152 Raises Error if cannot create a symlink.
153 """
154 self._lock.assert_locked()
155 for name, path in named_caches:
maruel681d6802017-01-17 16:56:03 -0800156 logging.info('Named cache %r -> %r', name, path)
nodirf33b8d62016-10-26 22:34:58 -0700157 try:
nodird6160682017-02-02 13:03:35 -0800158 _validate_named_cache_path(path)
nodirf33b8d62016-10-26 22:34:58 -0700159 symlink_path = os.path.abspath(os.path.join(root, path))
160 file_path.ensure_tree(os.path.dirname(symlink_path))
maruel681d6802017-01-17 16:56:03 -0800161 requested = self.request(name)
162 logging.info('Symlink %r to %r', symlink_path, requested)
163 fs.symlink(requested, symlink_path)
nodirf33b8d62016-10-26 22:34:58 -0700164 except (OSError, Error) as ex:
165 raise Error(
166 'cannot create a symlink for cache named "%s" at "%s": %s' % (
167 name, symlink_path, ex))
168
nodird6160682017-02-02 13:03:35 -0800169 def delete_symlinks(self, root, named_caches):
170 """Deletes symlinks from |root| for the specified named_caches.
171
172 named_caches must be a list of (name, path) tuples.
173 """
174 for name, path in named_caches:
175 logging.info('Unlinking named cache "%s"', name)
176 try:
177 _validate_named_cache_path(path)
178 symlink_path = os.path.abspath(os.path.join(root, path))
179 fs.unlink(symlink_path)
180 except (OSError, Error) as ex:
181 raise Error(
182 'cannot unlink cache named "%s" at "%s": %s' % (
183 name, symlink_path, ex))
184
nodirf33b8d62016-10-26 22:34:58 -0700185 def trim(self, min_free_space):
186 """Purges cache.
187
188 Removes cache directories that were not accessed for a long time
189 until there is enough free space and the number of caches is sane.
190
191 If min_free_space is None, disk free space is not checked.
192
193 Requires NamedCache to be open.
194 """
195 self._lock.assert_locked()
196 if not os.path.isdir(self.root_dir):
197 return
198
199 free_space = 0
maruel681d6802017-01-17 16:56:03 -0800200 if min_free_space:
201 free_space = file_path.get_free_space(self.root_dir)
202 while ((min_free_space and free_space < min_free_space)
nodirf33b8d62016-10-26 22:34:58 -0700203 or len(self._lru) > MAX_CACHE_SIZE):
maruel681d6802017-01-17 16:56:03 -0800204 logging.info(
205 'Making space for named cache %s > %s or %s > %s',
206 free_space, min_free_space, len(self._lru), MAX_CACHE_SIZE)
nodirf33b8d62016-10-26 22:34:58 -0700207 try:
208 name, (path, _) = self._lru.get_oldest()
209 except KeyError:
210 return
211 named_dir = self._get_named_path(name)
212 if fs.islink(named_dir):
213 fs.unlink(named_dir)
214 path_abs = os.path.join(self.root_dir, path)
215 if os.path.isdir(path_abs):
maruel681d6802017-01-17 16:56:03 -0800216 logging.info('Removing named cache %s', path_abs)
nodirf33b8d62016-10-26 22:34:58 -0700217 file_path.rmtree(path_abs)
maruel681d6802017-01-17 16:56:03 -0800218 if min_free_space:
nodirf33b8d62016-10-26 22:34:58 -0700219 free_space = file_path.get_free_space(self.root_dir)
220 self._lru.pop(name)
221
222 _DIR_ALPHABET = string.ascii_letters + string.digits
223
224 def _allocate_dir(self):
225 """Creates and returns relative path of a new cache directory."""
226 # We randomly generate directory names that have two lower/upper case
227 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
228 abc_len = len(self._DIR_ALPHABET)
229 tried = set()
230 while len(tried) < 1000:
nodir939a5dd2016-11-16 10:26:45 -0800231 i = random.randint(0, abc_len * abc_len - 1)
nodirf33b8d62016-10-26 22:34:58 -0700232 rel_path = (
233 self._DIR_ALPHABET[i / abc_len] +
234 self._DIR_ALPHABET[i % abc_len])
235 if rel_path in tried:
236 continue
237 abs_path = os.path.join(self.root_dir, rel_path)
238 if not fs.exists(abs_path):
239 return rel_path
240 tried.add(rel_path)
241 raise Error('could not allocate a new cache dir, too many cache dirs')
242
243 def _get_named_path(self, name):
244 return os.path.join(self.root_dir, 'named', name)
245
246
247def add_named_cache_options(parser):
248 group = optparse.OptionGroup(parser, 'Named caches')
249 group.add_option(
250 '--named-cache',
251 dest='named_caches',
252 action='append',
253 nargs=2,
254 default=[],
255 help='A named cache to request. Accepts two arguments, name and path. '
256 'name identifies the cache, must match regex [a-z0-9_]{1,4096}. '
257 'path is a path relative to the run dir where the cache directory '
258 'must be symlinked to. '
259 'This option can be specified more than once.')
260 group.add_option(
261 '--named-cache-root',
262 help='Cache root directory. Default=%default')
263 parser.add_option_group(group)
264
265
266def process_named_cache_options(parser, options):
267 """Validates named cache options and returns a CacheManager."""
268 if options.named_caches and not options.named_cache_root:
269 parser.error('--named-cache is specified, but --named-cache-root is empty')
270 for name, path in options.named_caches:
271 if not CACHE_NAME_RE.match(name):
272 parser.error(
273 'cache name "%s" does not match %s' % (name, CACHE_NAME_RE.pattern))
274 if not path:
275 parser.error('cache path cannot be empty')
276 if options.named_cache_root:
277 return CacheManager(os.path.abspath(options.named_cache_root))
278 return None
nodird6160682017-02-02 13:03:35 -0800279
280
281def _validate_named_cache_path(path):
282 if os.path.isabs(path):
283 raise Error('named cache path must not be absolute')
284 if '..' in path.split(os.path.sep):
285 raise Error('named cache path must not contain ".."')