blob: 877c59c254a419d320335ce69e564243ef4f59e5 [file] [log] [blame]
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -04001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Upload all debug symbols required for crash reporting purposes.
6
7This script need only be used to upload release builds symbols or to debug
8crashes on non-release builds (in which case try to only upload the symbols
Mike Frysinger02e1e072013-11-10 22:11:34 -05009for those executables involved).
10"""
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040011
12import ctypes
Mike Frysinger02e92402013-11-22 16:22:02 -050013import functools
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040014import multiprocessing
15import os
Mike Frysinger094a2172013-08-14 12:54:35 -040016import poster
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040017import random
18import textwrap
19import tempfile
20import time
Mike Frysinger094a2172013-08-14 12:54:35 -040021import urllib2
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040022
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040023from chromite.lib import commandline
24from chromite.lib import cros_build_lib
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040025from chromite.lib import parallel
Mike Frysinger69cb41d2013-08-11 20:08:19 -040026from chromite.scripts import cros_generate_breakpad_symbols
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040027
28
29# URLs used for uploading symbols.
30OFFICIAL_UPLOAD_URL = 'http://clients2.google.com/cr/symbol'
31STAGING_UPLOAD_URL = 'http://clients2.google.com/cr/staging_symbol'
32
33
34# The crash server rejects files that are this big.
35CRASH_SERVER_FILE_LIMIT = 350 * 1024 * 1024
36# Give ourselves a little breathing room from what the server expects.
37DEFAULT_FILE_LIMIT = CRASH_SERVER_FILE_LIMIT - (10 * 1024 * 1024)
38
39
Mike Frysingercd78a082013-06-26 17:13:04 -040040# How long to wait (in seconds) for a single upload to complete. This has
41# to allow for symbols that are up to CRASH_SERVER_FILE_LIMIT in size.
42UPLOAD_TIMEOUT = 30 * 60
43
44
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040045# Sleep for 200ms in between uploads to avoid DoS'ing symbol server.
46DEFAULT_SLEEP_DELAY = 0.2
47
48
49# Number of seconds to wait before retrying an upload. The delay will double
50# for each subsequent retry of the same symbol file.
51INITIAL_RETRY_DELAY = 1
52
53# Allow up to 7 attempts to upload a symbol file (total delay may be
54# 1+2+4+8+16+32=63 seconds).
55MAX_RETRIES = 6
56
Mike Frysingereb753bf2013-11-22 16:05:35 -050057# Number of total errors, before uploads are no longer attempted.
58# This is used to avoid lots of errors causing unreasonable delays.
59# See the related, but independent, error values below.
60MAX_TOTAL_ERRORS_FOR_RETRY = 30
61
62# A watermark of transient errors which we allow recovery from. If we hit
63# errors infrequently, overall we're probably doing fine. For example, if
64# we have one failure every 100 passes, then we probably don't want to fail
65# right away. But if we hit a string of failures in a row, we want to abort.
66#
67# The watermark starts at 0 (and can never go below that). When this error
68# level is exceeded, we stop uploading. When a failure happens, we add the
69# fail adjustment, and when an upload succeeds, we add the pass adjustment.
70# We want to penalize failures more so that we ramp up when there is a string
71# of them, but then slowly back off as things start working.
72#
73# A quick example:
74# 0.0: Starting point.
75# 0.0: Upload works, so add -0.5, and then clamp to 0.
76# 1.0: Upload fails, so add 1.0.
77# 2.0: Upload fails, so add 1.0.
78# 1.5: Upload works, so add -0.5.
79# 1.0: Upload works, so add -0.5.
80ERROR_WATERMARK = 3.0
81ERROR_ADJUST_FAIL = 1.0
82ERROR_ADJUST_PASS = -0.5
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040083
84
85def SymUpload(sym_file, upload_url):
Mike Frysinger094a2172013-08-14 12:54:35 -040086 """Upload a symbol file to a HTTP server
87
88 The upload is a multipart/form-data POST with the following parameters:
89 code_file: the basename of the module, e.g. "app"
90 code_identifier: the module file's identifier
91 debug_file: the basename of the debugging file, e.g. "app"
92 debug_identifier: the debug file's identifier, usually consisting of
93 the guid and age embedded in the pdb, e.g.
94 "11111111BBBB3333DDDD555555555555F"
95 version: the file version of the module, e.g. "1.2.3.4"
96 product: HTTP-friendly product name
97 os: the operating system that the module was built for
98 cpu: the CPU that the module was built for
99 symbol_file: the contents of the breakpad-format symbol file
100
101 Args:
102 sym_file: The symbol file to upload
103 upload_url: The crash URL to POST the |sym_file| to
104 """
105 sym_header = cros_generate_breakpad_symbols.ReadSymsHeader(sym_file)
106
107 fields = (
108 ('code_file', sym_header.name),
109 ('debug_file', sym_header.name),
110 ('debug_identifier', sym_header.id.replace('-', '')),
111 # Should we set these fields? They aren't critical, but it might be nice?
112 # We'd have to figure out what file this symbol is coming from and what
113 # package provides it ...
114 #('version', None),
115 #('product', 'ChromeOS'),
116 ('os', sym_header.os),
117 ('cpu', sym_header.cpu),
118 poster.encode.MultipartParam.from_file('symbol_file', sym_file),
119 )
120
121 data, headers = poster.encode.multipart_encode(fields)
122 request = urllib2.Request(upload_url, data, headers)
123 request.add_header('User-agent', 'chromite.upload_symbols')
124 urllib2.urlopen(request, timeout=UPLOAD_TIMEOUT)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400125
126
127def TestingSymUpload(sym_file, upload_url):
128 """A stub version of SymUpload for --testing usage"""
129 cmd = ['sym_upload', sym_file, upload_url]
130 # Randomly fail 80% of the time (the retry logic makes this 80%/3 per file).
131 returncode = random.randint(1, 100) <= 80
132 cros_build_lib.Debug('would run (and return %i): %s', returncode,
133 ' '.join(map(repr, cmd)))
134 if returncode:
135 output = 'Failed to send the symbol file.'
136 else:
137 output = 'Successfully sent the symbol file.'
138 result = cros_build_lib.CommandResult(cmd=cmd, error=None, output=output,
139 returncode=returncode)
140 if returncode:
Mike Frysinger094a2172013-08-14 12:54:35 -0400141 raise urllib2.HTTPError(upload_url, 400, 'forced test fail', {}, None)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400142 else:
143 return result
144
145
Mike Frysingereb753bf2013-11-22 16:05:35 -0500146def ErrorLimitHit(num_errors, watermark_errors):
147 """See if our error limit has been hit
148
149 Args:
150 num_errors: A multiprocessing.Value of the raw number of failures.
151 watermark_errors: A multiprocessing.Value of the current rate of failures.
152 Returns:
153 True if our error limits have been exceeded.
154 """
155 return ((num_errors is not None and
156 num_errors.value > MAX_TOTAL_ERRORS_FOR_RETRY) or
157 (watermark_errors is not None and
158 watermark_errors.value > ERROR_WATERMARK))
159
160
161def _UpdateCounter(counter, adj):
162 """Update |counter| by |adj|
163
164 Handle atomic updates of |counter|. Also make sure it does not
165 fall below 0.
166
167 Args:
168 counter: A multiprocessing.Value to update
169 adj: The value to add to |counter|
170 """
171 def _Update():
172 clamp = 0 if type(adj) is int else 0.0
173 counter.value = max(clamp, counter.value + adj)
174
175 if hasattr(counter, 'get_lock'):
176 with counter.get_lock():
177 _Update()
178 elif counter is not None:
179 _Update()
180
181
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400182def UploadSymbol(sym_file, upload_url, file_limit=DEFAULT_FILE_LIMIT,
Mike Frysinger02e92402013-11-22 16:22:02 -0500183 sleep=0, num_errors=None, watermark_errors=None,
184 failed_queue=None):
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400185 """Upload |sym_file| to |upload_url|
186
187 Args:
188 sym_file: The full path to the breakpad symbol to upload
189 upload_url: The crash server to upload things to
190 file_limit: The max file size of a symbol file before we try to strip it
191 sleep: Number of seconds to sleep before running
Mike Frysinger69cb41d2013-08-11 20:08:19 -0400192 num_errors: An object to update with the error count (needs a .value member)
Mike Frysingereb753bf2013-11-22 16:05:35 -0500193 watermark_errors: An object to track current error behavior (needs a .value)
Mike Frysinger02e92402013-11-22 16:22:02 -0500194 failed_queue: When a symbol fails, add it to this queue
Mike Frysinger69cb41d2013-08-11 20:08:19 -0400195 Returns:
196 The number of errors that were encountered.
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400197 """
198 if num_errors is None:
199 num_errors = ctypes.c_int()
Mike Frysingereb753bf2013-11-22 16:05:35 -0500200 if ErrorLimitHit(num_errors, watermark_errors):
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400201 # Abandon ship! It's on fire! NOoooooooooooOOOoooooo.
202 return 0
203
204 upload_file = sym_file
205
206 if sleep:
207 # Keeps us from DoS-ing the symbol server.
208 time.sleep(sleep)
209
210 cros_build_lib.Debug('uploading %s' % sym_file)
211
212 # Ideally there'd be a tempfile.SpooledNamedTemporaryFile that we could use.
213 with tempfile.NamedTemporaryFile(prefix='upload_symbols',
214 bufsize=0) as temp_sym_file:
215 if file_limit:
216 # If the symbols size is too big, strip out the call frame info. The CFI
217 # is unnecessary for 32bit x86 targets where the frame pointer is used (as
218 # all of ours have) and it accounts for over half the size of the symbols
219 # uploaded.
220 file_size = os.path.getsize(sym_file)
221 if file_size > file_limit:
222 cros_build_lib.Warning('stripping CFI from %s due to size %s > %s',
223 sym_file, file_size, file_limit)
224 temp_sym_file.writelines([x for x in open(sym_file, 'rb').readlines()
225 if not x.startswith('STACK CFI')])
226 upload_file = temp_sym_file.name
227
228 # Hopefully the crash server will let it through. But it probably won't.
229 # Not sure what the best answer is in this case.
230 file_size = os.path.getsize(upload_file)
231 if file_size > CRASH_SERVER_FILE_LIMIT:
232 cros_build_lib.PrintBuildbotStepWarnings()
Mike Frysinger02e92402013-11-22 16:22:02 -0500233 cros_build_lib.Warning('upload file %s is awfully large, risking '
234 'rejection by the symbol server (%s > %s)',
235 sym_file, file_size, CRASH_SERVER_FILE_LIMIT)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400236
237 # Upload the symbol file.
Mike Frysingereb753bf2013-11-22 16:05:35 -0500238 success = False
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400239 try:
Mike Frysinger9adcfd22013-10-24 12:01:40 -0400240 cros_build_lib.TimedCommand(
241 cros_build_lib.RetryException,
242 (urllib2.HTTPError, urllib2.URLError), MAX_RETRIES, SymUpload,
243 upload_file, upload_url, sleep=INITIAL_RETRY_DELAY,
244 timed_log_msg='upload of %10i bytes took %%s: %s' %
245 (file_size, os.path.basename(sym_file)))
Mike Frysingereb753bf2013-11-22 16:05:35 -0500246 success = True
Mike Frysinger094a2172013-08-14 12:54:35 -0400247 except urllib2.HTTPError as e:
248 cros_build_lib.Warning('could not upload: %s: HTTP %s: %s',
249 os.path.basename(sym_file), e.code, e.reason)
Mike Frysingerc4ab5782013-10-02 18:14:22 -0400250 except urllib2.URLError as e:
251 cros_build_lib.Warning('could not upload: %s: %s',
252 os.path.basename(sym_file), e)
Mike Frysingereb753bf2013-11-22 16:05:35 -0500253 finally:
254 if success:
255 _UpdateCounter(watermark_errors, ERROR_ADJUST_PASS)
256 else:
257 _UpdateCounter(num_errors, 1)
258 _UpdateCounter(watermark_errors, ERROR_ADJUST_FAIL)
Mike Frysinger02e92402013-11-22 16:22:02 -0500259 if failed_queue:
260 failed_queue.put(sym_file)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400261
262 return num_errors.value
263
264
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500265def SymbolFinder(paths):
266 """Locate symbol files in |paths|
267
268 Args:
269 paths: A list of input paths to walk. Files are returned w/out any checks.
270 Dirs are searched for files that end in ".sym".
271 Returns:
272 Yield every viable sym file.
273 """
274 for p in paths:
275 if os.path.isdir(p):
276 for root, _, files in os.walk(p):
277 for f in files:
278 if f.endswith('.sym'):
279 yield os.path.join(root, f)
280 else:
281 yield p
282
283
Mike Frysinger9dcf9ae2013-08-10 15:17:09 -0400284def UploadSymbols(board=None, official=False, breakpad_dir=None,
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400285 file_limit=DEFAULT_FILE_LIMIT, sleep=DEFAULT_SLEEP_DELAY,
Mike Frysinger02e92402013-11-22 16:22:02 -0500286 upload_count=None, sym_paths=None, root=None, retry=True):
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400287 """Upload all the generated symbols for |board| to the crash server
288
Mike Frysinger9dcf9ae2013-08-10 15:17:09 -0400289 You can use in a few ways:
290 * pass |board| to locate all of its symbols
291 * pass |breakpad_dir| to upload all the symbols in there
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500292 * pass |sym_paths| to upload specific symbols (or dirs of symbols)
Mike Frysinger9dcf9ae2013-08-10 15:17:09 -0400293
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400294 Args:
295 board: The board whose symbols we wish to upload
296 official: Use the official symbol server rather than the staging one
297 breakpad_dir: The full path to the breakpad directory where symbols live
298 file_limit: The max file size of a symbol file before we try to strip it
299 sleep: How long to sleep in between uploads
300 upload_count: If set, only upload this many symbols (meant for testing)
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500301 sym_paths: Specific symbol files (or dirs of sym files) to upload,
302 otherwise search |breakpad_dir|
Mike Frysinger118d2502013-08-19 03:36:56 -0400303 root: The tree to prefix to |breakpad_dir| (if |breakpad_dir| is not set)
Mike Frysinger02e92402013-11-22 16:22:02 -0500304 retry: Whether we should retry failures.
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400305 Returns:
Mike Frysinger69cb41d2013-08-11 20:08:19 -0400306 The number of errors that were encountered.
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400307 """
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400308 if official:
309 upload_url = OFFICIAL_UPLOAD_URL
310 else:
311 cros_build_lib.Warning('unofficial builds upload to the staging server')
312 upload_url = STAGING_UPLOAD_URL
313
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500314 if sym_paths:
315 cros_build_lib.Info('uploading specified symbols to %s', upload_url)
Mike Frysinger9dcf9ae2013-08-10 15:17:09 -0400316 else:
317 if breakpad_dir is None:
Mike Frysinger118d2502013-08-19 03:36:56 -0400318 breakpad_dir = os.path.join(
319 root,
320 cros_generate_breakpad_symbols.FindBreakpadDir(board).lstrip('/'))
Mike Frysinger9dcf9ae2013-08-10 15:17:09 -0400321 cros_build_lib.Info('uploading all symbols to %s from %s', upload_url,
322 breakpad_dir)
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500323 sym_paths = [breakpad_dir]
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400324
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400325 bg_errors = multiprocessing.Value('i')
Mike Frysingereb753bf2013-11-22 16:05:35 -0500326 watermark_errors = multiprocessing.Value('f')
Mike Frysinger02e92402013-11-22 16:22:02 -0500327 failed_queue = multiprocessing.Queue()
328 uploader = functools.partial(
329 UploadSymbol, file_limit=file_limit, sleep=sleep, num_errors=bg_errors,
330 watermark_errors=watermark_errors, failed_queue=failed_queue)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400331
Mike Frysinger02e92402013-11-22 16:22:02 -0500332 # For the first run, we collect the symbols that failed. If the
333 # overall failure rate was low, we'll retry them on the second run.
334 for retry in (retry, False):
335 # We need to limit ourselves to one upload at a time to avoid the server
336 # kicking in DoS protection. See these bugs for more details:
337 # http://crbug.com/209442
338 # http://crbug.com/212496
339 with parallel.BackgroundTaskRunner(uploader, processes=1) as queue:
340 for sym_file in SymbolFinder(sym_paths):
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500341 if upload_count == 0:
342 break
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400343
Mike Frysinger02e92402013-11-22 16:22:02 -0500344 queue.put([sym_file, upload_url])
345
346 if upload_count is not None:
347 upload_count -= 1
348 if upload_count == 0:
349 break
350
351 # See if we need to retry, and if we haven't failed too many times already.
352 if not retry or ErrorLimitHit(bg_errors, watermark_errors):
353 break
354
355 sym_paths = []
356 while not failed_queue.empty():
357 sym_paths.append(failed_queue.get())
358 if sym_paths:
359 cros_build_lib.Warning('retrying %i symbols', len(sym_paths))
360 upload_count += len(sym_paths)
361 # Decrement the error count in case we recover in the second pass.
362 assert bg_errors.value >= len(sym_paths), 'more failed files than errors?'
363 bg_errors.value -= len(sym_paths)
364 else:
365 # No failed symbols, so just return now.
366 break
367
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500368 return bg_errors.value
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400369
370
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400371def main(argv):
372 parser = commandline.ArgumentParser(description=__doc__)
373
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500374 parser.add_argument('sym_paths', type='path', nargs='*', default=None)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400375 parser.add_argument('--board', default=None,
376 help='board to build packages for')
377 parser.add_argument('--breakpad_root', type='path', default=None,
378 help='root directory for breakpad symbols')
379 parser.add_argument('--official_build', action='store_true', default=False,
380 help='point to official symbol server')
381 parser.add_argument('--regenerate', action='store_true', default=False,
382 help='regenerate all symbols')
383 parser.add_argument('--upload-count', type=int, default=None,
384 help='only upload # number of symbols')
385 parser.add_argument('--strip_cfi', type=int,
386 default=CRASH_SERVER_FILE_LIMIT - (10 * 1024 * 1024),
387 help='strip CFI data for files above this size')
388 parser.add_argument('--testing', action='store_true', default=False,
389 help='run in testing mode')
390 parser.add_argument('--yes', action='store_true', default=False,
391 help='answer yes to all prompts')
392
393 opts = parser.parse_args(argv)
394
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500395 if opts.sym_paths:
Mike Frysinger9dcf9ae2013-08-10 15:17:09 -0400396 if opts.regenerate:
397 cros_build_lib.Die('--regenerate may not be used with specific files')
398 else:
399 if opts.board is None:
400 cros_build_lib.Die('--board is required')
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400401
402 if opts.breakpad_root and opts.regenerate:
403 cros_build_lib.Die('--regenerate may not be used with --breakpad_root')
404
405 if opts.testing:
406 # TODO(build): Kill off --testing mode once unittests are up-to-snuff.
407 cros_build_lib.Info('running in testing mode')
408 # pylint: disable=W0601,W0603
409 global INITIAL_RETRY_DELAY, SymUpload, DEFAULT_SLEEP_DELAY
410 INITIAL_RETRY_DELAY = DEFAULT_SLEEP_DELAY = 0
411 SymUpload = TestingSymUpload
412
413 if not opts.yes:
414 query = textwrap.wrap(textwrap.dedent("""
415 Uploading symbols for an entire Chromium OS build is really only
416 necessary for release builds and in a few cases for developers
417 to debug problems. It will take considerable time to run. For
418 developer debugging purposes, consider instead passing specific
419 files to upload.
420 """), 80)
421 cros_build_lib.Warning('\n%s', '\n'.join(query))
422 if not cros_build_lib.BooleanPrompt(
423 prompt='Are you sure you want to upload all build symbols',
424 default=False):
425 cros_build_lib.Die('better safe than sorry')
426
427 ret = 0
428 if opts.regenerate:
Mike Frysinger69cb41d2013-08-11 20:08:19 -0400429 ret += cros_generate_breakpad_symbols.GenerateBreakpadSymbols(
430 opts.board, breakpad_dir=opts.breakpad_root)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400431
432 ret += UploadSymbols(opts.board, official=opts.official_build,
433 breakpad_dir=opts.breakpad_root,
434 file_limit=opts.strip_cfi, sleep=DEFAULT_SLEEP_DELAY,
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500435 upload_count=opts.upload_count, sym_paths=opts.sym_paths)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400436 if ret:
437 cros_build_lib.Error('encountered %i problem(s)', ret)
438 # Since exit(status) gets masked, clamp it to 1 so we don't inadvertently
439 # return 0 in case we are a multiple of the mask.
440 ret = 1
441
442 return ret
Mike Frysinger094a2172013-08-14 12:54:35 -0400443
444
445# We need this to run once per process. Do it at module import time as that
446# will let us avoid doing it inline at function call time (see SymUpload) as
447# that func might be called by the multiprocessing module which means we'll
448# do the opener logic multiple times overall. Plus, if you're importing this
449# module, it's a pretty good chance that you're going to need this.
450poster.streaminghttp.register_openers()