blob: e4f48710ff90a8c4a5020edd560437b244cc3bfa [file] [log] [blame]
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -04001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Upload all debug symbols required for crash reporting purposes.
6
7This script need only be used to upload release builds symbols or to debug
8crashes on non-release builds (in which case try to only upload the symbols
Mike Frysinger02e1e072013-11-10 22:11:34 -05009for those executables involved).
10"""
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040011
Mike Frysingera4fa1e82014-01-15 01:45:56 -050012from __future__ import print_function
13
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040014import ctypes
Mike Frysinger8ec8c502014-02-10 00:19:13 -050015import datetime
Mike Frysinger02e92402013-11-22 16:22:02 -050016import functools
Mike Frysinger0c0efa22014-02-09 23:32:23 -050017import hashlib
Mike Frysingera4fa1e82014-01-15 01:45:56 -050018import httplib
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040019import multiprocessing
20import os
Mike Frysinger094a2172013-08-14 12:54:35 -040021import poster
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040022import random
Mike Frysingerfd355652014-01-23 02:57:48 -050023import socket
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040024import textwrap
25import tempfile
26import time
Mike Frysinger094a2172013-08-14 12:54:35 -040027import urllib2
Mike Frysingerd41938e2014-02-10 06:37:55 -050028import urlparse
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040029
Mike Frysinger0c0efa22014-02-09 23:32:23 -050030from chromite.buildbot import constants
Mike Frysingerd41938e2014-02-10 06:37:55 -050031from chromite.lib import cache
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040032from chromite.lib import commandline
33from chromite.lib import cros_build_lib
Mike Frysingerd41938e2014-02-10 06:37:55 -050034from chromite.lib import gs
35from chromite.lib import osutils
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040036from chromite.lib import parallel
David Jamesc93e6a4d2014-01-13 11:37:36 -080037from chromite.lib import retry_util
Mike Frysinger0c0efa22014-02-09 23:32:23 -050038from chromite.lib import timeout_util
Mike Frysinger69cb41d2013-08-11 20:08:19 -040039from chromite.scripts import cros_generate_breakpad_symbols
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040040
Mike Frysinger0c0efa22014-02-09 23:32:23 -050041# Needs to be after chromite imports.
42# TODO(build): When doing the initial buildbot bootstrap, we won't have any
43# other repos available. So ignore isolateserver imports. But buildbot will
44# re-exec itself once it has done a full repo sync and then the module will
45# be available -- it isn't needed that early. http://crbug.com/341152
46try:
47 import isolateserver
48except ImportError:
49 isolateserver = None
50
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040051
52# URLs used for uploading symbols.
53OFFICIAL_UPLOAD_URL = 'http://clients2.google.com/cr/symbol'
54STAGING_UPLOAD_URL = 'http://clients2.google.com/cr/staging_symbol'
55
56
57# The crash server rejects files that are this big.
58CRASH_SERVER_FILE_LIMIT = 350 * 1024 * 1024
59# Give ourselves a little breathing room from what the server expects.
60DEFAULT_FILE_LIMIT = CRASH_SERVER_FILE_LIMIT - (10 * 1024 * 1024)
61
62
Mike Frysinger0c0efa22014-02-09 23:32:23 -050063# The batch limit when talking to the dedup server. We avoid sending one at a
64# time as the round trip overhead will dominate. Conversely, we avoid sending
65# all at once so we can start uploading symbols asap -- the symbol server is a
66# bit slow and will take longer than anything else.
67# TODO: A better algorithm would be adaptive. If we have more than one symbol
68# in the upload queue waiting, we could send more symbols to the dedupe server
69# at a time.
70DEDUPE_LIMIT = 100
71
72# How long to wait for the server to respond with the results. Note that the
73# larger the limit above, the larger this will need to be. So we give it ~1
74# second per item max.
75DEDUPE_TIMEOUT = DEDUPE_LIMIT
76
Mike Frysinger4dd462e2014-04-30 16:21:51 -040077# How long to wait for the notification to finish (in minutes). If it takes
78# longer than this, we'll stop notifiying, but that's not a big deal as we
79# will be able to recover in later runs.
80DEDUPE_NOTIFY_TIMEOUT = 20
81
Mike Frysinger0c0efa22014-02-09 23:32:23 -050082# The unique namespace in the dedupe server that only we use. Helps avoid
83# collisions with all the hashed values and unrelated content.
84OFFICIAL_DEDUPE_NAMESPACE = 'chromium-os-upload-symbols'
85STAGING_DEDUPE_NAMESPACE = '%s-staging' % OFFICIAL_DEDUPE_NAMESPACE
86
87
Mike Frysingercd78a082013-06-26 17:13:04 -040088# How long to wait (in seconds) for a single upload to complete. This has
89# to allow for symbols that are up to CRASH_SERVER_FILE_LIMIT in size.
90UPLOAD_TIMEOUT = 30 * 60
91
92
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -040093# Sleep for 200ms in between uploads to avoid DoS'ing symbol server.
94DEFAULT_SLEEP_DELAY = 0.2
95
96
97# Number of seconds to wait before retrying an upload. The delay will double
98# for each subsequent retry of the same symbol file.
99INITIAL_RETRY_DELAY = 1
100
101# Allow up to 7 attempts to upload a symbol file (total delay may be
102# 1+2+4+8+16+32=63 seconds).
103MAX_RETRIES = 6
104
Mike Frysingereb753bf2013-11-22 16:05:35 -0500105# Number of total errors, before uploads are no longer attempted.
106# This is used to avoid lots of errors causing unreasonable delays.
107# See the related, but independent, error values below.
108MAX_TOTAL_ERRORS_FOR_RETRY = 30
109
110# A watermark of transient errors which we allow recovery from. If we hit
111# errors infrequently, overall we're probably doing fine. For example, if
112# we have one failure every 100 passes, then we probably don't want to fail
113# right away. But if we hit a string of failures in a row, we want to abort.
114#
115# The watermark starts at 0 (and can never go below that). When this error
116# level is exceeded, we stop uploading. When a failure happens, we add the
117# fail adjustment, and when an upload succeeds, we add the pass adjustment.
118# We want to penalize failures more so that we ramp up when there is a string
119# of them, but then slowly back off as things start working.
120#
121# A quick example:
122# 0.0: Starting point.
123# 0.0: Upload works, so add -0.5, and then clamp to 0.
124# 1.0: Upload fails, so add 1.0.
125# 2.0: Upload fails, so add 1.0.
126# 1.5: Upload works, so add -0.5.
127# 1.0: Upload works, so add -0.5.
128ERROR_WATERMARK = 3.0
129ERROR_ADJUST_FAIL = 1.0
130ERROR_ADJUST_PASS = -0.5
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400131
132
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500133def SymUpload(upload_url, sym_item):
Mike Frysinger094a2172013-08-14 12:54:35 -0400134 """Upload a symbol file to a HTTP server
135
136 The upload is a multipart/form-data POST with the following parameters:
137 code_file: the basename of the module, e.g. "app"
138 code_identifier: the module file's identifier
139 debug_file: the basename of the debugging file, e.g. "app"
140 debug_identifier: the debug file's identifier, usually consisting of
141 the guid and age embedded in the pdb, e.g.
142 "11111111BBBB3333DDDD555555555555F"
143 version: the file version of the module, e.g. "1.2.3.4"
144 product: HTTP-friendly product name
145 os: the operating system that the module was built for
146 cpu: the CPU that the module was built for
147 symbol_file: the contents of the breakpad-format symbol file
148
149 Args:
Mike Frysinger094a2172013-08-14 12:54:35 -0400150 upload_url: The crash URL to POST the |sym_file| to
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500151 sym_item: A SymbolItem containing the path to the breakpad symbol to upload
Mike Frysinger094a2172013-08-14 12:54:35 -0400152 """
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500153 sym_header = sym_item.sym_header
154 sym_file = sym_item.sym_file
Mike Frysinger094a2172013-08-14 12:54:35 -0400155
156 fields = (
157 ('code_file', sym_header.name),
158 ('debug_file', sym_header.name),
159 ('debug_identifier', sym_header.id.replace('-', '')),
Mike Frysingerb8a966b2014-03-19 17:36:18 -0400160 # The product/version fields are used by the server only for statistic
161 # purposes. They do not impact symbolization, so they're safe to set
162 # to any value all the time.
163 # In this case, we use it to help see the load our build system is
164 # placing on the server.
165 # Not sure what to set for the version. Maybe the git sha1 of this file.
166 # Note: the server restricts this to 30 chars.
Mike Frysinger094a2172013-08-14 12:54:35 -0400167 #('version', None),
Mike Frysingerb8a966b2014-03-19 17:36:18 -0400168 ('product', 'ChromeOS'),
Mike Frysinger094a2172013-08-14 12:54:35 -0400169 ('os', sym_header.os),
170 ('cpu', sym_header.cpu),
171 poster.encode.MultipartParam.from_file('symbol_file', sym_file),
172 )
173
174 data, headers = poster.encode.multipart_encode(fields)
175 request = urllib2.Request(upload_url, data, headers)
176 request.add_header('User-agent', 'chromite.upload_symbols')
177 urllib2.urlopen(request, timeout=UPLOAD_TIMEOUT)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400178
179
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500180def TestingSymUpload(upload_url, sym_item):
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400181 """A stub version of SymUpload for --testing usage"""
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500182 cmd = ['sym_upload', sym_item.sym_file, upload_url]
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400183 # Randomly fail 80% of the time (the retry logic makes this 80%/3 per file).
184 returncode = random.randint(1, 100) <= 80
185 cros_build_lib.Debug('would run (and return %i): %s', returncode,
Matt Tennant7feda352013-12-20 14:03:40 -0800186 cros_build_lib.CmdToStr(cmd))
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400187 if returncode:
188 output = 'Failed to send the symbol file.'
189 else:
190 output = 'Successfully sent the symbol file.'
191 result = cros_build_lib.CommandResult(cmd=cmd, error=None, output=output,
192 returncode=returncode)
193 if returncode:
Mike Frysingera4fa1e82014-01-15 01:45:56 -0500194 exceptions = (
Mike Frysingerfd355652014-01-23 02:57:48 -0500195 socket.error('[socket.error] forced test fail'),
Mike Frysingera4fa1e82014-01-15 01:45:56 -0500196 httplib.BadStatusLine('[BadStatusLine] forced test fail'),
197 urllib2.HTTPError(upload_url, 400, '[HTTPError] forced test fail',
198 {}, None),
199 urllib2.URLError('[URLError] forced test fail'),
200 )
201 raise random.choice(exceptions)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400202 else:
203 return result
204
205
Mike Frysingereb753bf2013-11-22 16:05:35 -0500206def ErrorLimitHit(num_errors, watermark_errors):
207 """See if our error limit has been hit
208
209 Args:
210 num_errors: A multiprocessing.Value of the raw number of failures.
211 watermark_errors: A multiprocessing.Value of the current rate of failures.
Mike Frysinger1a736a82013-12-12 01:50:59 -0500212
Mike Frysingereb753bf2013-11-22 16:05:35 -0500213 Returns:
214 True if our error limits have been exceeded.
215 """
216 return ((num_errors is not None and
217 num_errors.value > MAX_TOTAL_ERRORS_FOR_RETRY) or
218 (watermark_errors is not None and
219 watermark_errors.value > ERROR_WATERMARK))
220
221
222def _UpdateCounter(counter, adj):
223 """Update |counter| by |adj|
224
225 Handle atomic updates of |counter|. Also make sure it does not
226 fall below 0.
227
228 Args:
229 counter: A multiprocessing.Value to update
230 adj: The value to add to |counter|
231 """
232 def _Update():
233 clamp = 0 if type(adj) is int else 0.0
234 counter.value = max(clamp, counter.value + adj)
235
236 if hasattr(counter, 'get_lock'):
237 with counter.get_lock():
238 _Update()
239 elif counter is not None:
240 _Update()
241
242
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500243def UploadSymbol(upload_url, sym_item, file_limit=DEFAULT_FILE_LIMIT,
Mike Frysinger02e92402013-11-22 16:22:02 -0500244 sleep=0, num_errors=None, watermark_errors=None,
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500245 failed_queue=None, passed_queue=None):
246 """Upload |sym_item| to |upload_url|
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400247
248 Args:
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400249 upload_url: The crash server to upload things to
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500250 sym_item: A SymbolItem containing the path to the breakpad symbol to upload
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400251 file_limit: The max file size of a symbol file before we try to strip it
252 sleep: Number of seconds to sleep before running
Mike Frysinger69cb41d2013-08-11 20:08:19 -0400253 num_errors: An object to update with the error count (needs a .value member)
Mike Frysingereb753bf2013-11-22 16:05:35 -0500254 watermark_errors: An object to track current error behavior (needs a .value)
Mike Frysinger02e92402013-11-22 16:22:02 -0500255 failed_queue: When a symbol fails, add it to this queue
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500256 passed_queue: When a symbol passes, add it to this queue
Mike Frysinger1a736a82013-12-12 01:50:59 -0500257
Mike Frysinger69cb41d2013-08-11 20:08:19 -0400258 Returns:
259 The number of errors that were encountered.
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400260 """
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500261 sym_file = sym_item.sym_file
262 upload_item = sym_item
263
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400264 if num_errors is None:
265 num_errors = ctypes.c_int()
Mike Frysingereb753bf2013-11-22 16:05:35 -0500266 if ErrorLimitHit(num_errors, watermark_errors):
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400267 # Abandon ship! It's on fire! NOoooooooooooOOOoooooo.
Mike Frysinger7f9be142014-01-15 02:16:42 -0500268 if failed_queue:
269 failed_queue.put(sym_file)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400270 return 0
271
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400272 if sleep:
273 # Keeps us from DoS-ing the symbol server.
274 time.sleep(sleep)
275
276 cros_build_lib.Debug('uploading %s' % sym_file)
277
278 # Ideally there'd be a tempfile.SpooledNamedTemporaryFile that we could use.
279 with tempfile.NamedTemporaryFile(prefix='upload_symbols',
280 bufsize=0) as temp_sym_file:
281 if file_limit:
282 # If the symbols size is too big, strip out the call frame info. The CFI
283 # is unnecessary for 32bit x86 targets where the frame pointer is used (as
284 # all of ours have) and it accounts for over half the size of the symbols
285 # uploaded.
286 file_size = os.path.getsize(sym_file)
287 if file_size > file_limit:
288 cros_build_lib.Warning('stripping CFI from %s due to size %s > %s',
289 sym_file, file_size, file_limit)
290 temp_sym_file.writelines([x for x in open(sym_file, 'rb').readlines()
291 if not x.startswith('STACK CFI')])
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500292
293 upload_item = FakeItem(sym_file=temp_sym_file.name,
294 sym_header=sym_item.sym_header)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400295
296 # Hopefully the crash server will let it through. But it probably won't.
297 # Not sure what the best answer is in this case.
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500298 file_size = os.path.getsize(upload_item.sym_file)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400299 if file_size > CRASH_SERVER_FILE_LIMIT:
300 cros_build_lib.PrintBuildbotStepWarnings()
Mike Frysinger02e92402013-11-22 16:22:02 -0500301 cros_build_lib.Warning('upload file %s is awfully large, risking '
302 'rejection by the symbol server (%s > %s)',
303 sym_file, file_size, CRASH_SERVER_FILE_LIMIT)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400304
305 # Upload the symbol file.
Mike Frysingereb753bf2013-11-22 16:05:35 -0500306 success = False
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400307 try:
Mike Frysinger9adcfd22013-10-24 12:01:40 -0400308 cros_build_lib.TimedCommand(
David Jamesc93e6a4d2014-01-13 11:37:36 -0800309 retry_util.RetryException,
Mike Frysinger9adcfd22013-10-24 12:01:40 -0400310 (urllib2.HTTPError, urllib2.URLError), MAX_RETRIES, SymUpload,
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500311 upload_url, upload_item, sleep=INITIAL_RETRY_DELAY,
Mike Frysinger9adcfd22013-10-24 12:01:40 -0400312 timed_log_msg='upload of %10i bytes took %%s: %s' %
313 (file_size, os.path.basename(sym_file)))
Mike Frysingereb753bf2013-11-22 16:05:35 -0500314 success = True
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500315
316 if passed_queue:
317 passed_queue.put(sym_item)
Mike Frysinger094a2172013-08-14 12:54:35 -0400318 except urllib2.HTTPError as e:
319 cros_build_lib.Warning('could not upload: %s: HTTP %s: %s',
320 os.path.basename(sym_file), e.code, e.reason)
Mike Frysingerfd355652014-01-23 02:57:48 -0500321 except (urllib2.URLError, httplib.HTTPException, socket.error) as e:
Mike Frysingerc4ab5782013-10-02 18:14:22 -0400322 cros_build_lib.Warning('could not upload: %s: %s',
323 os.path.basename(sym_file), e)
Mike Frysingereb753bf2013-11-22 16:05:35 -0500324 finally:
325 if success:
326 _UpdateCounter(watermark_errors, ERROR_ADJUST_PASS)
327 else:
328 _UpdateCounter(num_errors, 1)
329 _UpdateCounter(watermark_errors, ERROR_ADJUST_FAIL)
Mike Frysinger02e92402013-11-22 16:22:02 -0500330 if failed_queue:
331 failed_queue.put(sym_file)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400332
333 return num_errors.value
334
335
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500336# A dummy class that allows for stubbing in tests and SymUpload.
337FakeItem = cros_build_lib.Collection(
338 'FakeItem', sym_file=None, sym_header=None, content=lambda x: '')
339
340
341# TODO(build): Delete this if check. http://crbug.com/341152
342if isolateserver:
343 class SymbolItem(isolateserver.BufferItem):
344 """Turn a sym_file into an isolateserver.Item"""
345
346 ALGO = hashlib.sha1
347
348 def __init__(self, sym_file):
349 sym_header = cros_generate_breakpad_symbols.ReadSymsHeader(sym_file)
350 super(SymbolItem, self).__init__(str(sym_header), self.ALGO)
351 self.sym_header = sym_header
352 self.sym_file = sym_file
353
354
355def SymbolDeduplicatorNotify(dedupe_namespace, dedupe_queue):
356 """Send a symbol file to the swarming service
357
358 Notify the swarming service of a successful upload. If the notification fails
359 for any reason, we ignore it. We don't care as it just means we'll upload it
360 again later on, and the symbol server will handle that graciously.
361
362 This func runs in a different process from the main one, so we cannot share
363 the storage object. Instead, we create our own. This func stays alive for
364 the life of the process, so we only create one here overall.
365
366 Args:
367 dedupe_namespace: The isolateserver namespace to dedupe uploaded symbols.
368 dedupe_queue: The queue to read SymbolItems from
369 """
370 if dedupe_queue is None:
371 return
372
373 item = None
374 try:
Mike Frysinger650e6722014-04-28 18:29:15 -0400375 with timeout_util.Timeout(DEDUPE_TIMEOUT):
376 storage = isolateserver.get_storage_api(constants.ISOLATESERVER,
377 dedupe_namespace)
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500378 for item in iter(dedupe_queue.get, None):
379 with timeout_util.Timeout(DEDUPE_TIMEOUT):
Mike Frysingerefef3672014-04-20 10:06:45 -0400380 cros_build_lib.Debug('sending %s to dedupe server', item.sym_file)
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500381 storage.push(item, item.content(0))
Mike Frysingerae298452014-03-24 22:45:23 -0400382 cros_build_lib.Info('dedupe notification finished; exiting')
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500383 except Exception:
384 sym_file = item.sym_file if (item and item.sym_file) else ''
385 cros_build_lib.Warning('posting %s to dedupe server failed',
386 os.path.basename(sym_file), exc_info=True)
387
Mike Frysinger58312e92014-03-18 04:18:36 -0400388 # Keep draining the queue though so it doesn't fill up.
389 while dedupe_queue.get() is not None:
390 continue
391
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500392
393def SymbolDeduplicator(storage, sym_paths):
394 """Filter out symbol files that we've already uploaded
395
396 Using the swarming service, ask it to tell us which symbol files we've already
397 uploaded in previous runs and/or by other bots. If the query fails for any
398 reason, we'll just upload all symbols. This is fine as the symbol server will
399 do the right thing and this phase is purely an optimization.
400
401 This code runs in the main thread which is why we can re-use the existing
402 storage object. Saves us from having to recreate one all the time.
403
404 Args:
405 storage: An isolateserver.StorageApi object
406 sym_paths: List of symbol files to check against the dedupe server
407
408 Returns:
409 List of symbol files that have not been uploaded before
410 """
411 if not sym_paths:
412 return sym_paths
413
414 items = [SymbolItem(x) for x in sym_paths]
415 if storage:
416 try:
417 with timeout_util.Timeout(DEDUPE_TIMEOUT):
418 items = storage.contains(items)
419 except Exception:
420 cros_build_lib.Warning('talking to dedupe server failed', exc_info=True)
421
422 return items
423
424
Mike Frysingerd41938e2014-02-10 06:37:55 -0500425def IsTarball(path):
426 """Guess if this is a tarball based on the filename."""
427 parts = path.split('.')
428 if len(parts) <= 1:
429 return False
430
431 if parts[-1] == 'tar':
432 return True
433
434 if parts[-2] == 'tar':
435 return parts[-1] in ('bz2', 'gz', 'xz')
436
437 return parts[-1] in ('tbz2', 'tbz', 'tgz', 'txz')
438
439
440def SymbolFinder(tempdir, paths):
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500441 """Locate symbol files in |paths|
442
443 Args:
Mike Frysingerd41938e2014-02-10 06:37:55 -0500444 tempdir: Path to use for temporary files (caller will clean up).
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500445 paths: A list of input paths to walk. Files are returned w/out any checks.
Mike Frysingerd41938e2014-02-10 06:37:55 -0500446 Dirs are searched for files that end in ".sym". Urls are fetched and then
447 processed. Tarballs are unpacked and walked.
Mike Frysinger1a736a82013-12-12 01:50:59 -0500448
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500449 Returns:
450 Yield every viable sym file.
451 """
452 for p in paths:
Don Garrett25f309a2014-03-19 14:02:12 -0700453 # Pylint is confused about members of ParseResult.
Don Garrettf8bf7842014-03-20 17:03:42 -0700454
Mike Frysingerd41938e2014-02-10 06:37:55 -0500455 o = urlparse.urlparse(p)
Don Garrettf8bf7842014-03-20 17:03:42 -0700456 if o.scheme: # pylint: disable=E1101
Mike Frysingerd41938e2014-02-10 06:37:55 -0500457 # Support globs of filenames.
458 ctx = gs.GSContext()
459 for p in ctx.LS(p):
460 cros_build_lib.Info('processing files inside %s', p)
461 o = urlparse.urlparse(p)
462 cache_dir = commandline.GetCacheDir()
463 common_path = os.path.join(cache_dir, constants.COMMON_CACHE)
464 tar_cache = cache.TarballCache(common_path)
Don Garrettf8bf7842014-03-20 17:03:42 -0700465 key = ('%s%s' % (o.netloc, o.path)).split('/') # pylint: disable=E1101
Mike Frysingerd41938e2014-02-10 06:37:55 -0500466 # The common cache will not be LRU, removing the need to hold a read
467 # lock on the cached gsutil.
468 ref = tar_cache.Lookup(key)
469 try:
470 ref.SetDefault(p)
471 except cros_build_lib.RunCommandError as e:
472 cros_build_lib.Warning('ignoring %s\n%s', p, e)
473 continue
474 for p in SymbolFinder(tempdir, [ref.path]):
475 yield p
476
477 elif os.path.isdir(p):
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500478 for root, _, files in os.walk(p):
479 for f in files:
480 if f.endswith('.sym'):
481 yield os.path.join(root, f)
Mike Frysingerd41938e2014-02-10 06:37:55 -0500482
483 elif IsTarball(p):
484 cros_build_lib.Info('processing files inside %s', p)
485 tardir = tempfile.mkdtemp(dir=tempdir)
486 cache.Untar(os.path.realpath(p), tardir)
487 for p in SymbolFinder(tardir, [tardir]):
488 yield p
489
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500490 else:
491 yield p
492
493
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500494def WriteQueueToFile(listing, queue, relpath=None):
495 """Write all the items in |queue| to the |listing|.
496
Mike Frysinger5e6dd712014-03-07 22:21:17 -0500497 Note: The queue must have a sentinel None appended to the end.
498
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500499 Args:
500 listing: Where to write out the list of files.
501 queue: The queue of paths to drain.
502 relpath: If set, write out paths relative to this one.
503 """
504 if not listing:
Mike Frysingera0ddac62014-03-14 10:30:25 -0400505 # Still drain the queue so we make sure the producer has finished
506 # before we return. Otherwise, the queue might get destroyed too
507 # quickly which will trigger a traceback in the producer.
508 while queue.get() is not None:
509 continue
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500510 return
511
512 with cros_build_lib.Open(listing, 'wb+') as f:
Mike Frysinger5e6dd712014-03-07 22:21:17 -0500513 while True:
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500514 path = queue.get()
Mike Frysinger5e6dd712014-03-07 22:21:17 -0500515 if path is None:
516 return
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500517 if relpath:
518 path = os.path.relpath(path, relpath)
519 f.write('%s\n' % path)
520
521
Mike Frysinger9dcf9ae2013-08-10 15:17:09 -0400522def UploadSymbols(board=None, official=False, breakpad_dir=None,
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400523 file_limit=DEFAULT_FILE_LIMIT, sleep=DEFAULT_SLEEP_DELAY,
Mike Frysinger8ec8c502014-02-10 00:19:13 -0500524 upload_limit=None, sym_paths=None, failed_list=None,
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500525 root=None, retry=True, dedupe_namespace=None):
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400526 """Upload all the generated symbols for |board| to the crash server
527
Mike Frysinger9dcf9ae2013-08-10 15:17:09 -0400528 You can use in a few ways:
529 * pass |board| to locate all of its symbols
530 * pass |breakpad_dir| to upload all the symbols in there
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500531 * pass |sym_paths| to upload specific symbols (or dirs of symbols)
Mike Frysinger9dcf9ae2013-08-10 15:17:09 -0400532
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400533 Args:
534 board: The board whose symbols we wish to upload
535 official: Use the official symbol server rather than the staging one
536 breakpad_dir: The full path to the breakpad directory where symbols live
537 file_limit: The max file size of a symbol file before we try to strip it
538 sleep: How long to sleep in between uploads
Mike Frysinger8ec8c502014-02-10 00:19:13 -0500539 upload_limit: If set, only upload this many symbols (meant for testing)
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500540 sym_paths: Specific symbol files (or dirs of sym files) to upload,
541 otherwise search |breakpad_dir|
Mike Frysinger7f9be142014-01-15 02:16:42 -0500542 failed_list: Write the names of all sym files we did not upload; can be a
543 filename or file-like object.
Mike Frysinger118d2502013-08-19 03:36:56 -0400544 root: The tree to prefix to |breakpad_dir| (if |breakpad_dir| is not set)
Mike Frysinger02e92402013-11-22 16:22:02 -0500545 retry: Whether we should retry failures.
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500546 dedupe_namespace: The isolateserver namespace to dedupe uploaded symbols.
Mike Frysinger1a736a82013-12-12 01:50:59 -0500547
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400548 Returns:
Mike Frysinger69cb41d2013-08-11 20:08:19 -0400549 The number of errors that were encountered.
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400550 """
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500551 # TODO(build): Delete this assert.
552 assert isolateserver, 'Missing isolateserver import http://crbug.com/341152'
553
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400554 if official:
555 upload_url = OFFICIAL_UPLOAD_URL
556 else:
557 cros_build_lib.Warning('unofficial builds upload to the staging server')
558 upload_url = STAGING_UPLOAD_URL
559
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500560 if sym_paths:
561 cros_build_lib.Info('uploading specified symbols to %s', upload_url)
Mike Frysinger9dcf9ae2013-08-10 15:17:09 -0400562 else:
563 if breakpad_dir is None:
Mike Frysinger118d2502013-08-19 03:36:56 -0400564 breakpad_dir = os.path.join(
565 root,
566 cros_generate_breakpad_symbols.FindBreakpadDir(board).lstrip('/'))
Mike Frysinger9dcf9ae2013-08-10 15:17:09 -0400567 cros_build_lib.Info('uploading all symbols to %s from %s', upload_url,
568 breakpad_dir)
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500569 sym_paths = [breakpad_dir]
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400570
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500571 # We use storage_query to ask the server about existing symbols. The
572 # storage_notify_proc process is used to post updates to the server. We
573 # cannot safely share the storage object between threads/processes, but
574 # we also want to minimize creating new ones as each object has to init
575 # new state (like server connections).
Mike Frysinger650e6722014-04-28 18:29:15 -0400576 storage_query = None
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500577 if dedupe_namespace:
578 dedupe_limit = DEDUPE_LIMIT
579 dedupe_queue = multiprocessing.Queue()
Mike Frysinger650e6722014-04-28 18:29:15 -0400580 try:
581 with timeout_util.Timeout(DEDUPE_TIMEOUT):
582 storage_query = isolateserver.get_storage_api(constants.ISOLATESERVER,
583 dedupe_namespace)
584 except Exception:
585 cros_build_lib.Warning('initializing dedupe server connection failed',
586 exc_info=True)
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500587 else:
588 dedupe_limit = 1
Mike Frysinger650e6722014-04-28 18:29:15 -0400589 dedupe_queue = None
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500590 # Can't use parallel.BackgroundTaskRunner because that'll create multiple
591 # processes and we want only one the whole time (see comment above).
592 storage_notify_proc = multiprocessing.Process(
593 target=SymbolDeduplicatorNotify, args=(dedupe_namespace, dedupe_queue))
594
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400595 bg_errors = multiprocessing.Value('i')
Mike Frysingereb753bf2013-11-22 16:05:35 -0500596 watermark_errors = multiprocessing.Value('f')
Mike Frysinger02e92402013-11-22 16:22:02 -0500597 failed_queue = multiprocessing.Queue()
598 uploader = functools.partial(
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500599 UploadSymbol, upload_url, file_limit=file_limit, sleep=sleep,
600 num_errors=bg_errors, watermark_errors=watermark_errors,
601 failed_queue=failed_queue, passed_queue=dedupe_queue)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400602
Mike Frysinger8ec8c502014-02-10 00:19:13 -0500603 start_time = datetime.datetime.now()
604 Counters = cros_build_lib.Collection(
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500605 'Counters', upload_limit=upload_limit, uploaded_count=0, deduped_count=0)
Mike Frysinger8ec8c502014-02-10 00:19:13 -0500606 counters = Counters()
607
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500608 def _Upload(queue, counters, files):
609 if not files:
610 return
611
612 missing_count = 0
613 for item in SymbolDeduplicator(storage_query, files):
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500614 missing_count += 1
Mike Frysingerd42e5f02014-03-14 11:19:37 -0400615
616 if counters.upload_limit == 0:
617 continue
618
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500619 queue.put((item,))
620 counters.uploaded_count += 1
621 if counters.upload_limit is not None:
622 counters.upload_limit -= 1
623
624 counters.deduped_count += (len(files) - missing_count)
625
Mike Frysinger13870082014-03-14 10:41:20 -0400626 try:
Mike Frysingerd41938e2014-02-10 06:37:55 -0500627 storage_notify_proc.start()
Mike Frysinger02e92402013-11-22 16:22:02 -0500628
Mike Frysinger13870082014-03-14 10:41:20 -0400629 with osutils.TempDir(prefix='upload_symbols.') as tempdir:
630 # For the first run, we collect the symbols that failed. If the
631 # overall failure rate was low, we'll retry them on the second run.
632 for retry in (retry, False):
633 # We need to limit ourselves to one upload at a time to avoid the server
634 # kicking in DoS protection. See these bugs for more details:
635 # http://crbug.com/209442
636 # http://crbug.com/212496
637 with parallel.BackgroundTaskRunner(uploader, processes=1) as queue:
Mike Frysingerd41938e2014-02-10 06:37:55 -0500638 dedupe_list = []
Mike Frysinger13870082014-03-14 10:41:20 -0400639 for sym_file in SymbolFinder(tempdir, sym_paths):
640 dedupe_list.append(sym_file)
641 dedupe_len = len(dedupe_list)
642 if dedupe_len < dedupe_limit:
643 if (counters.upload_limit is None or
644 dedupe_len < counters.upload_limit):
645 continue
Mike Frysinger02e92402013-11-22 16:22:02 -0500646
Mike Frysinger1010a892014-03-14 11:24:17 -0400647 # We check the counter before _Upload so that we don't keep talking
648 # to the dedupe server. Otherwise, we end up sending one symbol at
649 # a time to it and that slows things down a lot.
650 if counters.upload_limit == 0:
651 break
652
Mike Frysinger13870082014-03-14 10:41:20 -0400653 _Upload(queue, counters, dedupe_list)
654 dedupe_list = []
655 _Upload(queue, counters, dedupe_list)
Mike Frysingerd41938e2014-02-10 06:37:55 -0500656
Mike Frysinger13870082014-03-14 10:41:20 -0400657 # See if we need to retry, and if we haven't failed too many times yet.
658 if not retry or ErrorLimitHit(bg_errors, watermark_errors):
Mike Frysinger5e6dd712014-03-07 22:21:17 -0500659 break
Mike Frysinger5e6dd712014-03-07 22:21:17 -0500660
Mike Frysinger13870082014-03-14 10:41:20 -0400661 sym_paths = []
662 failed_queue.put(None)
663 while True:
664 sym_path = failed_queue.get()
665 if sym_path is None:
666 break
667 sym_paths.append(sym_path)
Mike Frysinger02e92402013-11-22 16:22:02 -0500668
Mike Frysinger13870082014-03-14 10:41:20 -0400669 if sym_paths:
670 cros_build_lib.Warning('retrying %i symbols', len(sym_paths))
671 if counters.upload_limit is not None:
672 counters.upload_limit += len(sym_paths)
673 # Decrement the error count in case we recover in the second pass.
674 assert bg_errors.value >= len(sym_paths), \
675 'more failed files than errors?'
676 bg_errors.value -= len(sym_paths)
677 else:
678 # No failed symbols, so just return now.
679 break
Mike Frysinger7f9be142014-01-15 02:16:42 -0500680
Mike Frysinger13870082014-03-14 10:41:20 -0400681 # If the user has requested it, save all the symbol files that we failed to
682 # upload to a listing file. This should help with recovery efforts later.
683 failed_queue.put(None)
684 WriteQueueToFile(failed_list, failed_queue, breakpad_dir)
685
686 finally:
Mike Frysingerae298452014-03-24 22:45:23 -0400687 cros_build_lib.Info('finished uploading; joining background process')
Mike Frysinger13870082014-03-14 10:41:20 -0400688 if dedupe_queue:
689 dedupe_queue.put(None)
Mike Frysinger4dd462e2014-04-30 16:21:51 -0400690
691 # The notification might be slow going, so give it some time to finish.
692 # We have to poll here as the process monitor is watching for output and
693 # will kill us if we go silent for too long.
694 wait_minutes = DEDUPE_NOTIFY_TIMEOUT
695 while storage_notify_proc.is_alive() and wait_minutes > 0:
696 cros_build_lib.Info('waiting up to %i minutes for ~%i notifications',
697 wait_minutes, dedupe_queue.qsize())
698 storage_notify_proc.join(60)
699 wait_minutes -= 1
700
701 # The process is taking too long, so kill it and complain.
702 if storage_notify_proc.is_alive():
703 storage_notify_proc.terminate()
704 cros_build_lib.Warning('notification process took too long')
705 cros_build_lib.PrintBuildbotStepWarnings()
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500706
707 cros_build_lib.Info('uploaded %i symbols (%i were deduped) which took: %s',
708 counters.uploaded_count, counters.deduped_count,
Mike Frysinger8ec8c502014-02-10 00:19:13 -0500709 datetime.datetime.now() - start_time)
710
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500711 return bg_errors.value
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400712
713
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400714def main(argv):
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500715 # TODO(build): Delete this assert.
716 assert isolateserver, 'Missing isolateserver import http://crbug.com/341152'
717
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400718 parser = commandline.ArgumentParser(description=__doc__)
719
Mike Frysingerd41938e2014-02-10 06:37:55 -0500720 parser.add_argument('sym_paths', type='path_or_uri', nargs='*', default=None,
721 help='symbol file or directory or URL or tarball')
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400722 parser.add_argument('--board', default=None,
723 help='board to build packages for')
724 parser.add_argument('--breakpad_root', type='path', default=None,
725 help='root directory for breakpad symbols')
726 parser.add_argument('--official_build', action='store_true', default=False,
727 help='point to official symbol server')
728 parser.add_argument('--regenerate', action='store_true', default=False,
729 help='regenerate all symbols')
Mike Frysinger8ec8c502014-02-10 00:19:13 -0500730 parser.add_argument('--upload-limit', type=int, default=None,
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400731 help='only upload # number of symbols')
732 parser.add_argument('--strip_cfi', type=int,
733 default=CRASH_SERVER_FILE_LIMIT - (10 * 1024 * 1024),
734 help='strip CFI data for files above this size')
Mike Frysinger7f9be142014-01-15 02:16:42 -0500735 parser.add_argument('--failed-list', type='path',
736 help='where to save a list of failed symbols')
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500737 parser.add_argument('--dedupe', action='store_true', default=False,
738 help='use the swarming service to avoid re-uploading')
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400739 parser.add_argument('--testing', action='store_true', default=False,
740 help='run in testing mode')
741 parser.add_argument('--yes', action='store_true', default=False,
742 help='answer yes to all prompts')
743
744 opts = parser.parse_args(argv)
Mike Frysinger90e49ca2014-01-14 14:42:07 -0500745 opts.Freeze()
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400746
Mike Frysinger9b2ff5c2013-11-22 10:01:12 -0500747 if opts.sym_paths:
Mike Frysinger9dcf9ae2013-08-10 15:17:09 -0400748 if opts.regenerate:
749 cros_build_lib.Die('--regenerate may not be used with specific files')
750 else:
751 if opts.board is None:
752 cros_build_lib.Die('--board is required')
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400753
754 if opts.breakpad_root and opts.regenerate:
755 cros_build_lib.Die('--regenerate may not be used with --breakpad_root')
756
757 if opts.testing:
758 # TODO(build): Kill off --testing mode once unittests are up-to-snuff.
759 cros_build_lib.Info('running in testing mode')
760 # pylint: disable=W0601,W0603
761 global INITIAL_RETRY_DELAY, SymUpload, DEFAULT_SLEEP_DELAY
762 INITIAL_RETRY_DELAY = DEFAULT_SLEEP_DELAY = 0
763 SymUpload = TestingSymUpload
764
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500765 dedupe_namespace = None
766 if opts.dedupe:
767 if opts.official_build and not opts.testing:
768 dedupe_namespace = OFFICIAL_DEDUPE_NAMESPACE
769 else:
770 dedupe_namespace = STAGING_DEDUPE_NAMESPACE
771
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400772 if not opts.yes:
Mike Frysingerc5de9602014-02-09 02:42:36 -0500773 prolog = '\n'.join(textwrap.wrap(textwrap.dedent("""
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400774 Uploading symbols for an entire Chromium OS build is really only
775 necessary for release builds and in a few cases for developers
776 to debug problems. It will take considerable time to run. For
777 developer debugging purposes, consider instead passing specific
778 files to upload.
Mike Frysingerc5de9602014-02-09 02:42:36 -0500779 """), 80)).strip()
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400780 if not cros_build_lib.BooleanPrompt(
781 prompt='Are you sure you want to upload all build symbols',
Mike Frysingerc5de9602014-02-09 02:42:36 -0500782 default=False, prolog=prolog):
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400783 cros_build_lib.Die('better safe than sorry')
784
785 ret = 0
786 if opts.regenerate:
Mike Frysinger69cb41d2013-08-11 20:08:19 -0400787 ret += cros_generate_breakpad_symbols.GenerateBreakpadSymbols(
788 opts.board, breakpad_dir=opts.breakpad_root)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400789
790 ret += UploadSymbols(opts.board, official=opts.official_build,
791 breakpad_dir=opts.breakpad_root,
792 file_limit=opts.strip_cfi, sleep=DEFAULT_SLEEP_DELAY,
Mike Frysinger8ec8c502014-02-10 00:19:13 -0500793 upload_limit=opts.upload_limit, sym_paths=opts.sym_paths,
Mike Frysinger0c0efa22014-02-09 23:32:23 -0500794 failed_list=opts.failed_list,
795 dedupe_namespace=dedupe_namespace)
Mike Frysingerd5fcb3a2013-05-30 21:10:50 -0400796 if ret:
797 cros_build_lib.Error('encountered %i problem(s)', ret)
798 # Since exit(status) gets masked, clamp it to 1 so we don't inadvertently
799 # return 0 in case we are a multiple of the mask.
800 ret = 1
801
802 return ret
Mike Frysinger094a2172013-08-14 12:54:35 -0400803
804
805# We need this to run once per process. Do it at module import time as that
806# will let us avoid doing it inline at function call time (see SymUpload) as
807# that func might be called by the multiprocessing module which means we'll
808# do the opener logic multiple times overall. Plus, if you're importing this
809# module, it's a pretty good chance that you're going to need this.
810poster.streaminghttp.register_openers()