blob: 95ea830efde0830cc0f6b958b7582a2fe3ce2715 [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
3# Copyright (c) 2012 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
23import getpass
24import glob
25import json
26import logging
maruel@chromium.org306e0e72012-11-02 18:22:03 +000027import unicodedata
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000028import optparse
29import os
30import re
31import subprocess
32import sys
33import tempfile
34import threading
35import time
36import weakref
37
38## OS-specific imports
39
40if sys.platform == 'win32':
41 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p
42 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611
43 from ctypes.wintypes import GetLastError # pylint: disable=E0611
44elif sys.platform == 'darwin':
45 import Carbon.File # pylint: disable=F0401
46 import MacOS # pylint: disable=F0401
47
maruel@chromium.org46e61cc2013-03-25 19:55:34 +000048import run_isolated
49
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000050
51BASE_DIR = os.path.dirname(os.path.abspath(__file__))
52ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
53
54
55class TracingFailure(Exception):
56 """An exception occured during tracing."""
57 def __init__(self, description, pid, line_number, line, *args):
58 super(TracingFailure, self).__init__(
59 description, pid, line_number, line, *args)
60 self.description = description
61 self.pid = pid
62 self.line_number = line_number
63 self.line = line
64 self.extra = args
65
66 def __str__(self):
67 out = self.description
68 if self.pid:
69 out += '\npid: %d' % self.pid
70 if self.line_number:
71 out += '\nline: %d' % self.line_number
72 if self.line:
73 out += '\n%s' % self.line
74 if self.extra:
75 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
76 return out
77
78
79## OS-specific functions
80
81if sys.platform == 'win32':
82 def QueryDosDevice(drive_letter):
83 """Returns the Windows 'native' path for a DOS drive letter."""
84 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
maruel@chromium.org306e0e72012-11-02 18:22:03 +000085 assert isinstance(drive_letter, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000086 # Guesswork. QueryDosDeviceW never returns the required number of bytes.
87 chars = 1024
maruel@chromium.org306e0e72012-11-02 18:22:03 +000088 drive_letter = drive_letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000089 p = create_unicode_buffer(chars)
90 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):
91 err = GetLastError()
92 if err:
93 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +000094 msg = u'QueryDosDevice(%s): %s (%d)' % (
95 drive_letter, FormatError(err), err)
96 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000097 return p.value
98
99
100 def GetShortPathName(long_path):
101 """Returns the Windows short path equivalent for a 'long' path."""
maruel@chromium.org68bb1482013-02-15 22:36:24 +0000102 assert isinstance(long_path, unicode), repr(long_path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000103 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
104 # not enforced.
105 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):
106 long_path = '\\\\?\\' + long_path
107 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)
108 if chars:
109 p = create_unicode_buffer(chars)
110 if windll.kernel32.GetShortPathNameW(long_path, p, chars):
111 return p.value
112
113 err = GetLastError()
114 if err:
115 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000116 msg = u'GetShortPathName(%s): %s (%d)' % (
117 long_path, FormatError(err), err)
118 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000119
120
121 def GetLongPathName(short_path):
122 """Returns the Windows long path equivalent for a 'short' path."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000123 assert isinstance(short_path, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000124 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
125 # not enforced.
126 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):
127 short_path = '\\\\?\\' + short_path
128 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)
129 if chars:
130 p = create_unicode_buffer(chars)
131 if windll.kernel32.GetLongPathNameW(short_path, p, chars):
132 return p.value
133
134 err = GetLastError()
135 if err:
136 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000137 msg = u'GetLongPathName(%s): %s (%d)' % (
138 short_path, FormatError(err), err)
139 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000140
141
142 def get_current_encoding():
143 """Returns the 'ANSI' code page associated to the process."""
144 return 'cp%d' % int(windll.kernel32.GetACP())
145
146
147 class DosDriveMap(object):
148 """Maps \Device\HarddiskVolumeN to N: on Windows."""
149 # Keep one global cache.
150 _MAPPING = {}
151
152 def __init__(self):
153 """Lazy loads the cache."""
154 if not self._MAPPING:
155 # This is related to UNC resolver on windows. Ignore that.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000156 self._MAPPING[u'\\Device\\Mup'] = None
157 self._MAPPING[u'\\SystemRoot'] = os.environ[u'SystemRoot']
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000158
159 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
160 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000161 letter = u'%s:' % letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000162 mapped = QueryDosDevice(letter)
163 if mapped in self._MAPPING:
164 logging.warn(
165 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
166 '. Drive letters are a user-mode concept and the kernel '
167 'traces only have NT path, so all accesses will be '
168 'associated with the first drive letter, independent of the '
169 'actual letter used by the code') % (
170 self._MAPPING[mapped], letter))
171 else:
172 self._MAPPING[mapped] = letter
173 except WindowsError: # pylint: disable=E0602
174 pass
175
176 def to_win32(self, path):
177 """Converts a native NT path to Win32/DOS compatible path."""
178 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
179 if not match:
180 raise ValueError(
181 'Can\'t convert %s into a Win32 compatible path' % path,
182 path)
183 if not match.group(1) in self._MAPPING:
184 # Unmapped partitions may be accessed by windows for the
185 # fun of it while the test is running. Discard these.
186 return None
187 drive = self._MAPPING[match.group(1)]
188 if not drive or not match.group(2):
189 return drive
190 return drive + match.group(2)
191
192
193 def isabs(path):
194 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""
195 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
196
197
198 def get_native_path_case(p):
199 """Returns the native path case for an existing file.
200
201 On Windows, removes any leading '\\?\'.
202 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000203 assert isinstance(p, unicode), repr(p)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000204 if not isabs(p):
205 raise ValueError(
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000206 'get_native_path_case(%r): Require an absolute path' % p, p)
207
maruel@chromium.org037758d2012-12-10 17:59:46 +0000208 # Make sure it is normalized to os.path.sep. Do not do it here to keep the
209 # function fast
210 assert '/' not in p, p
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000211 suffix = ''
212 count = p.count(':')
213 if count > 1:
214 # This means it has an alternate-data stream. There could be 3 ':', since
215 # it could be the $DATA datastream of an ADS. Split the whole ADS suffix
216 # off and add it back afterward. There is no way to know the native path
217 # case of an alternate data stream.
218 items = p.split(':')
219 p = ':'.join(items[0:2])
220 suffix = ''.join(':' + i for i in items[2:])
221
maruel@chromium.org42b091d2013-04-02 22:27:02 +0000222 # TODO(maruel): Use os.path.normpath?
223 if p.endswith('.\\'):
224 p = p[:-2]
225
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000226 # Windows used to have an option to turn on case sensitivity on non Win32
227 # subsystem but that's out of scope here and isn't supported anymore.
228 # Go figure why GetShortPathName() is needed.
229 try:
230 out = GetLongPathName(GetShortPathName(p))
231 except OSError, e:
232 if e.args[0] in (2, 3, 5):
233 # The path does not exist. Try to recurse and reconstruct the path.
234 base = os.path.dirname(p)
235 rest = os.path.basename(p)
236 return os.path.join(get_native_path_case(base), rest)
237 raise
238 if out.startswith('\\\\?\\'):
239 out = out[4:]
240 # Always upper case the first letter since GetLongPathName() will return the
241 # drive letter in the case it was given.
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000242 return out[0].upper() + out[1:] + suffix
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000243
244
245 def CommandLineToArgvW(command_line):
246 """Splits a commandline into argv using CommandLineToArgvW()."""
247 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
248 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000249 assert isinstance(command_line, unicode)
250 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000251 try:
252 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
253 finally:
254 windll.kernel32.LocalFree(ptr)
255
256
257elif sys.platform == 'darwin':
258
259
260 # On non-windows, keep the stdlib behavior.
261 isabs = os.path.isabs
262
263
264 def _find_item_native_case(root_path, item):
265 """Gets the native path case of a single item based at root_path.
266
267 There is no API to get the native path case of symlinks on OSX. So it
268 needs to be done the slow way.
269 """
270 item = item.lower()
271 for element in os.listdir(root_path):
272 if element.lower() == item:
273 return element
274
275
276 def _native_case(p):
277 """Gets the native path case. Warning: this function resolves symlinks."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000278 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000279 rel_ref, _ = Carbon.File.FSPathMakeRef(p.encode('utf-8'))
280 # The OSX underlying code uses NFD but python strings are in NFC. This
281 # will cause issues with os.listdir() for example. Since the dtrace log
282 # *is* in NFC, normalize it here.
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000283 out = unicodedata.normalize(
284 'NFC', rel_ref.FSRefMakePath().decode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000285 if p.endswith(os.path.sep) and not out.endswith(os.path.sep):
286 return out + os.path.sep
287 return out
288 except MacOS.Error, e:
289 if e.args[0] in (-43, -120):
290 # The path does not exist. Try to recurse and reconstruct the path.
291 # -43 means file not found.
292 # -120 means directory not found.
293 base = os.path.dirname(p)
294 rest = os.path.basename(p)
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000295 return os.path.join(_native_case(base), rest)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000296 raise OSError(
297 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])
298
299
300 def _split_at_symlink_native(base_path, rest):
301 """Returns the native path for a symlink."""
302 base, symlink, rest = split_at_symlink(base_path, rest)
303 if symlink:
304 if not base_path:
305 base_path = base
306 else:
307 base_path = safe_join(base_path, base)
308 symlink = _find_item_native_case(base_path, symlink)
309 return base, symlink, rest
310
311
312 def get_native_path_case(path):
313 """Returns the native path case for an existing file.
314
315 Technically, it's only HFS+ on OSX that is case preserving and
316 insensitive. It's the default setting on HFS+ but can be changed.
317 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000318 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000319 if not isabs(path):
320 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000321 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000322 if path.startswith('/dev'):
323 # /dev is not visible from Carbon, causing an exception.
324 return path
325
326 # Starts assuming there is no symlink along the path.
327 resolved = _native_case(path)
328 if resolved.lower() == path.lower():
329 # This code path is incredibly faster.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000330 logging.debug('get_native_path_case(%s) = %s' % (path, resolved))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000331 return resolved
332
maruel@chromium.org42b091d2013-04-02 22:27:02 +0000333 # TODO(maruel): Use os.path.normpath?
csharp@chromium.org0c1afc82013-03-27 17:07:50 +0000334 if resolved.lower() + './' == path.lower():
335 return resolved[:-2]
336
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000337 # There was a symlink, process it.
338 base, symlink, rest = _split_at_symlink_native(None, path)
339 assert symlink, (path, base, symlink, rest, resolved)
340 prev = base
341 base = safe_join(_native_case(base), symlink)
342 assert len(base) > len(prev)
343 while rest:
344 prev = base
345 relbase, symlink, rest = _split_at_symlink_native(base, rest)
346 base = safe_join(base, relbase)
347 assert len(base) > len(prev), (prev, base, symlink)
348 if symlink:
349 base = safe_join(base, symlink)
350 assert len(base) > len(prev), (prev, base, symlink)
351 # Make sure no symlink was resolved.
352 assert base.lower() == path.lower(), (base, path)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000353 logging.debug('get_native_path_case(%s) = %s' % (path, base))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000354 return base
355
356
357else: # OSes other than Windows and OSX.
358
359
360 # On non-windows, keep the stdlib behavior.
361 isabs = os.path.isabs
362
363
364 def get_native_path_case(path):
365 """Returns the native path case for an existing file.
366
367 On OSes other than OSX and Windows, assume the file system is
368 case-sensitive.
369
370 TODO(maruel): This is not strictly true. Implement if necessary.
371 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000372 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000373 if not isabs(path):
374 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000375 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000376 # Give up on cygwin, as GetLongPathName() can't be called.
377 # Linux traces tends to not be normalized so use this occasion to normalize
378 # it. This function implementation already normalizes the path on the other
379 # OS so this needs to be done here to be coherent between OSes.
380 out = os.path.normpath(path)
381 if path.endswith(os.path.sep) and not out.endswith(os.path.sep):
382 return out + os.path.sep
383 return out
384
385
386if sys.platform != 'win32': # All non-Windows OSes.
387
388
389 def safe_join(*args):
390 """Joins path elements like os.path.join() but doesn't abort on absolute
391 path.
392
393 os.path.join('foo', '/bar') == '/bar'
394 but safe_join('foo', '/bar') == 'foo/bar'.
395 """
396 out = ''
397 for element in args:
398 if element.startswith(os.path.sep):
399 if out.endswith(os.path.sep):
400 out += element[1:]
401 else:
402 out += element
403 else:
404 if out.endswith(os.path.sep):
405 out += element
406 else:
407 out += os.path.sep + element
408 return out
409
410
411 def split_at_symlink(base_dir, relfile):
412 """Scans each component of relfile and cut the string at the symlink if
413 there is any.
414
415 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if
416 not symlink was found.
417 """
418 if base_dir:
419 assert relfile
420 assert os.path.isabs(base_dir)
421 index = 0
422 else:
423 assert os.path.isabs(relfile)
424 index = 1
425
426 def at_root(rest):
427 if base_dir:
428 return safe_join(base_dir, rest)
429 return rest
430
431 while True:
432 try:
433 index = relfile.index(os.path.sep, index)
434 except ValueError:
435 index = len(relfile)
436 full = at_root(relfile[:index])
437 if os.path.islink(full):
438 # A symlink!
439 base = os.path.dirname(relfile[:index])
440 symlink = os.path.basename(relfile[:index])
441 rest = relfile[index:]
442 logging.debug(
443 'split_at_symlink(%s, %s) -> (%s, %s, %s)' %
444 (base_dir, relfile, base, symlink, rest))
445 return base, symlink, rest
446 if index == len(relfile):
447 break
448 index += 1
449 return relfile, None, None
450
451
452def fix_python_path(cmd):
453 """Returns the fixed command line to call the right python executable."""
454 out = cmd[:]
455 if out[0] == 'python':
456 out[0] = sys.executable
457 elif out[0].endswith('.py'):
458 out.insert(0, sys.executable)
459 return out
460
461
462def create_thunk():
463 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
464 os.write(
465 handle,
466 (
467 'import subprocess\n'
468 'import sys\n'
469 'sys.exit(subprocess.call(sys.argv[2:]))\n'
470 ))
471 os.close(handle)
472 return name
473
474
475def strace_process_quoted_arguments(text):
476 """Extracts quoted arguments on a string and return the arguments as a list.
477
478 Implemented as an automaton. Supports incomplete strings in the form
479 '"foo"...'.
480
481 Example:
482 With text = '"foo", "bar"', the function will return ['foo', 'bar']
483
484 TODO(maruel): Implement escaping.
485 """
486 # All the possible states of the DFA.
487 ( NEED_QUOTE, # Begining of a new arguments.
488 INSIDE_STRING, # Inside an argument.
489 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
490 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
491 # a serie of 3 dots or a comma.
492 NEED_SPACE, # Right after a comma
493 NEED_DOT_2, # Found a dot, need a second one.
494 NEED_DOT_3, # Found second dot, need a third one.
495 NEED_COMMA, # Found third dot, need a comma.
496 ) = range(8)
497
498 state = NEED_QUOTE
499 out = []
500 for index, char in enumerate(text):
501 if char == '"':
502 if state == NEED_QUOTE:
503 state = INSIDE_STRING
504 # A new argument was found.
505 out.append('')
506 elif state == INSIDE_STRING:
507 # The argument is now closed.
508 state = NEED_COMMA_OR_DOT
509 elif state == ESCAPED:
510 out[-1] += char
511 state = INSIDE_STRING
512 else:
513 raise ValueError(
514 'Can\'t process char at column %d for: %r' % (index, text),
515 index,
516 text)
517 elif char == ',':
518 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
519 state = NEED_SPACE
520 elif state == INSIDE_STRING:
521 out[-1] += char
522 elif state == ESCAPED:
523 out[-1] += char
524 state = INSIDE_STRING
525 else:
526 raise ValueError(
527 'Can\'t process char at column %d for: %r' % (index, text),
528 index,
529 text)
530 elif char == ' ':
531 if state == NEED_SPACE:
532 state = NEED_QUOTE
533 elif state == INSIDE_STRING:
534 out[-1] += char
535 elif state == ESCAPED:
536 out[-1] += char
537 state = INSIDE_STRING
538 else:
539 raise ValueError(
540 'Can\'t process char at column %d for: %r' % (index, text),
541 index,
542 text)
543 elif char == '.':
544 if state == NEED_COMMA_OR_DOT:
545 # The string is incomplete, this mean the strace -s flag should be
546 # increased.
547 state = NEED_DOT_2
548 elif state == NEED_DOT_2:
549 state = NEED_DOT_3
550 elif state == NEED_DOT_3:
551 state = NEED_COMMA
552 elif state == INSIDE_STRING:
553 out[-1] += char
554 elif state == ESCAPED:
555 out[-1] += char
556 state = INSIDE_STRING
557 else:
558 raise ValueError(
559 'Can\'t process char at column %d for: %r' % (index, text),
560 index,
561 text)
562 elif char == '\\':
563 if state == ESCAPED:
564 out[-1] += char
565 state = INSIDE_STRING
566 elif state == INSIDE_STRING:
567 state = ESCAPED
568 else:
569 raise ValueError(
570 'Can\'t process char at column %d for: %r' % (index, text),
571 index,
572 text)
573 else:
574 if state == INSIDE_STRING:
575 out[-1] += char
576 else:
577 raise ValueError(
578 'Can\'t process char at column %d for: %r' % (index, text),
579 index,
580 text)
581 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
582 raise ValueError(
583 'String is incorrectly terminated: %r' % text,
584 text)
585 return out
586
587
588def read_json(filepath):
589 with open(filepath, 'r') as f:
590 return json.load(f)
591
592
593def write_json(filepath_or_handle, data, dense):
594 """Writes data into filepath or file handle encoded as json.
595
596 If dense is True, the json is packed. Otherwise, it is human readable.
597 """
598 if hasattr(filepath_or_handle, 'write'):
599 if dense:
600 filepath_or_handle.write(
601 json.dumps(data, sort_keys=True, separators=(',',':')))
602 else:
603 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
604 else:
605 with open(filepath_or_handle, 'wb') as f:
606 if dense:
607 json.dump(data, f, sort_keys=True, separators=(',',':'))
608 else:
609 json.dump(data, f, sort_keys=True, indent=2)
610
611
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000612def assert_is_renderable(pseudo_string):
613 """Asserts the input is a valid object to be processed by render()."""
614 assert (
615 isinstance(pseudo_string, (None.__class__, unicode)) or
616 hasattr(pseudo_string, 'render')), repr(pseudo_string)
617
618
619def render(pseudo_string):
620 """Converts the pseudo-string to an unicode string."""
621 assert_is_renderable(pseudo_string)
622 if isinstance(pseudo_string, (None.__class__, unicode)):
623 return pseudo_string
624 return pseudo_string.render()
625
626
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000627class Results(object):
628 """Results of a trace session."""
629
630 class _TouchedObject(object):
631 """Something, a file or a directory, that was accessed."""
632 def __init__(self, root, path, tainted, size, nb_files):
633 logging.debug(
634 '%s(%s, %s, %s, %s, %s)' %
635 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000636 assert_is_renderable(root)
637 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000638 self.root = root
639 self.path = path
640 self.tainted = tainted
641 self.nb_files = nb_files
642 # Can be used as a cache or a default value, depending on context.
643 self._size = size
644 # These are cache only.
645 self._real_path = None
646
647 # Check internal consistency.
648 assert path, path
649 assert tainted or bool(root) != bool(isabs(path)), (root, path)
650 assert tainted or (
651 not os.path.exists(self.full_path) or
652 (self.full_path == get_native_path_case(self.full_path))), (
653 tainted, self.full_path, get_native_path_case(self.full_path))
654
655 @property
656 def existent(self):
657 return self.size != -1
658
659 @property
660 def full_path(self):
661 if self.root:
662 return os.path.join(self.root, self.path)
663 return self.path
664
665 @property
666 def real_path(self):
667 """Returns the path with symlinks resolved."""
668 if not self._real_path:
669 self._real_path = os.path.realpath(self.full_path)
670 return self._real_path
671
672 @property
673 def size(self):
674 """File's size. -1 is not existent."""
675 if self._size is None and not self.tainted:
676 try:
677 self._size = os.stat(self.full_path).st_size
678 except OSError:
679 self._size = -1
680 return self._size
681
682 def flatten(self):
683 """Returns a dict representing this object.
684
685 A 'size' of 0 means the file was only touched and not read.
686 """
687 return {
688 'path': self.path,
689 'size': self.size,
690 }
691
692 def replace_variables(self, variables):
693 """Replaces the root of this File with one of the variables if it matches.
694
695 If a variable replacement occurs, the cloned object becomes tainted.
696 """
697 for variable, root_path in variables.iteritems():
698 if self.path.startswith(root_path):
699 return self._clone(
700 self.root, variable + self.path[len(root_path):], True)
701 # No need to clone, returns ourself.
702 return self
703
704 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000705 """Returns a clone of itself with 'root' stripped off.
706
707 Note that the file is kept if it is either accessible from a symlinked
708 path that was used to access the file or through the real path.
709 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000710 # Check internal consistency.
711 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root
712 if not self.full_path.startswith(root):
713 # Now try to resolve the symlinks to see if it can be reached this way.
714 # Only try *after* trying without resolving symlink.
715 if not self.real_path.startswith(root):
716 return None
717 path = self.real_path
718 else:
719 path = self.full_path
720 return self._clone(root, path[len(root):], self.tainted)
721
722 def _clone(self, new_root, new_path, tainted):
723 raise NotImplementedError(self.__class__.__name__)
724
725 class File(_TouchedObject):
726 """A file that was accessed. May not be present anymore.
727
728 If tainted is true, it means it is not a real path anymore as a variable
729 replacement occured.
730
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000731 If only_touched is True, this means the file was probed for existence, and
732 it is existent, but was never _opened_. If only_touched is True, the file
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000733 must have existed.
734 """
735 def __init__(self, root, path, tainted, size):
736 super(Results.File, self).__init__(root, path, tainted, size, 1)
737
738 def _clone(self, new_root, new_path, tainted):
739 """Clones itself keeping meta-data."""
740 # Keep the self.size and self._real_path caches for performance reason. It
741 # is also important when the file becomes tainted (with a variable instead
742 # of the real path) since self.path is not an on-disk path anymore so
743 # out._size cannot be updated.
744 out = self.__class__(new_root, new_path, tainted, self.size)
745 out._real_path = self._real_path
746 return out
747
748 class Directory(_TouchedObject):
749 """A directory of files. Must exist."""
750 def __init__(self, root, path, tainted, size, nb_files):
751 """path='.' is a valid value and must be handled appropriately."""
752 assert not path.endswith(os.path.sep), path
753 super(Results.Directory, self).__init__(
754 root, path + os.path.sep, tainted, size, nb_files)
755 # For a Directory instance, self.size is not a cache, it's an actual value
756 # that is never modified and represents the total size of the files
757 # contained in this directory. It is possible that the directory is empty
758 # so that size == 0; this happens if there's only an invalid symlink in
759 # it.
760
761 def flatten(self):
762 out = super(Results.Directory, self).flatten()
763 out['nb_files'] = self.nb_files
764 return out
765
766 def _clone(self, new_root, new_path, tainted):
767 """Clones itself keeping meta-data."""
768 out = self.__class__(
769 new_root,
770 new_path.rstrip(os.path.sep),
771 tainted,
772 self.size,
773 self.nb_files)
774 out._real_path = self._real_path
775 return out
776
777 class Process(object):
778 """A process that was traced.
779
780 Contains references to the files accessed by this process and its children.
781 """
782 def __init__(self, pid, files, executable, command, initial_cwd, children):
783 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
784 self.pid = pid
785 self.files = sorted(files, key=lambda x: x.path)
786 self.children = children
787 self.executable = executable
788 self.command = command
789 self.initial_cwd = initial_cwd
790
791 # Check internal consistency.
792 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
793 f.path for f in self.files)
794 assert isinstance(self.children, list)
795 assert isinstance(self.files, list)
796
797 @property
798 def all(self):
799 for child in self.children:
800 for i in child.all:
801 yield i
802 yield self
803
804 def flatten(self):
805 return {
806 'children': [c.flatten() for c in self.children],
807 'command': self.command,
808 'executable': self.executable,
809 'files': [f.flatten() for f in self.files],
810 'initial_cwd': self.initial_cwd,
811 'pid': self.pid,
812 }
813
814 def strip_root(self, root):
815 assert isabs(root) and root.endswith(os.path.sep), root
816 # Loads the files after since they are constructed as objects.
817 out = self.__class__(
818 self.pid,
819 filter(None, (f.strip_root(root) for f in self.files)),
820 self.executable,
821 self.command,
822 self.initial_cwd,
823 [c.strip_root(root) for c in self.children])
824 logging.debug(
825 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
826 return out
827
828 def __init__(self, process):
829 self.process = process
830 # Cache.
831 self._files = None
832
833 def flatten(self):
834 return {
835 'root': self.process.flatten(),
836 }
837
838 @property
839 def files(self):
840 if self._files is None:
841 self._files = sorted(
842 sum((p.files for p in self.process.all), []),
843 key=lambda x: x.path)
844 return self._files
845
846 @property
847 def existent(self):
848 return [f for f in self.files if f.existent]
849
850 @property
851 def non_existent(self):
852 return [f for f in self.files if not f.existent]
853
854 def strip_root(self, root):
855 """Returns a clone with all the files outside the directory |root| removed
856 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000857
858 It keeps files accessible through the |root| directory or that have been
859 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000860 """
861 # Resolve any symlink
862 root = os.path.realpath(root)
863 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep
864 logging.debug('strip_root(%s)' % root)
865 return Results(self.process.strip_root(root))
866
867
868class ApiBase(object):
869 """OS-agnostic API to trace a process and its children."""
870 class Context(object):
871 """Processes one log line at a time and keeps the list of traced processes.
872
873 The parsing is complicated by the fact that logs are traced out of order for
874 strace but in-order for dtrace and logman. In addition, on Windows it is
875 very frequent that processids are reused so a flat list cannot be used. But
876 at the same time, it is impossible to faithfully construct a graph when the
877 logs are processed out of order. So both a tree and a flat mapping are used,
878 the tree is the real process tree, while the flat mapping stores the last
879 valid process for the corresponding processid. For the strace case, the
880 tree's head is guessed at the last moment.
881 """
882 class Process(object):
883 """Keeps context for one traced child process.
884
885 Logs all the files this process touched. Ignores directories.
886 """
887 def __init__(self, blacklist, pid, initial_cwd):
888 # Check internal consistency.
889 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000890 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000891 self.pid = pid
892 # children are Process instances.
893 self.children = []
894 self.initial_cwd = initial_cwd
895 self.cwd = None
896 self.files = set()
897 self.only_touched = set()
898 self.executable = None
899 self.command = None
900 self._blacklist = blacklist
901
902 def to_results_process(self):
903 """Resolves file case sensitivity and or late-bound strings."""
904 # When resolving files, it's normal to get dupe because a file could be
905 # opened multiple times with different case. Resolve the deduplication
906 # here.
907 def fix_path(x):
908 """Returns the native file path case.
909
910 Converts late-bound strings.
911 """
912 if not x:
913 # Do not convert None instance to 'None'.
914 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000915 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000916 if os.path.isabs(x):
917 # If the path is not absolute, which tends to happen occasionally on
918 # Windows, it is not possible to get the native path case so ignore
919 # that trace. It mostly happens for 'executable' value.
920 x = get_native_path_case(x)
921 return x
922
923 def fix_and_blacklist_path(x):
924 x = fix_path(x)
925 if not x:
926 return
927 # The blacklist needs to be reapplied, since path casing could
928 # influence blacklisting.
929 if self._blacklist(x):
930 return
931 return x
932
933 # Filters out directories. Some may have passed through.
934 files = set(f for f in map(fix_and_blacklist_path, self.files) if f)
935 only_touched = set(
936 f for f in map(fix_and_blacklist_path, self.only_touched) if f)
937 only_touched -= files
938
939 files = [
940 Results.File(None, f, False, None) for f in files
941 if not os.path.isdir(f)
942 ]
943 # Using 0 as size means the file's content is ignored since the file was
944 # never opened for I/O.
945 files.extend(
946 Results.File(None, f, False, 0) for f in only_touched
947 if not os.path.isdir(f)
948 )
949 return Results.Process(
950 self.pid,
951 files,
952 fix_path(self.executable),
953 self.command,
954 fix_path(self.initial_cwd),
955 [c.to_results_process() for c in self.children])
956
957 def add_file(self, filepath, touch_only):
958 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000959 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000960 return
961 logging.debug('add_file(%d, %s, %s)' % (self.pid, filepath, touch_only))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000962 # Note that filepath and not render(filepath) is added. It is
963 # because filepath could be something else than a string, like a
964 # RelativePath instance for dtrace logs.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000965 if touch_only:
966 self.only_touched.add(filepath)
967 else:
968 self.files.add(filepath)
969
970 def __init__(self, blacklist):
971 self.blacklist = blacklist
972 # Initial process.
973 self.root_process = None
974 # dict to accelerate process lookup, to not have to lookup the whole graph
975 # each time.
976 self._process_lookup = {}
977
978 class Tracer(object):
979 """During it's lifetime, the tracing subsystem is enabled."""
980 def __init__(self, logname):
981 self._logname = logname
982 self._lock = threading.Lock()
983 self._traces = []
984 self._initialized = True
985
986 def trace(self, cmd, cwd, tracename, output):
987 """Runs the OS-specific trace program on an executable.
988
989 Arguments:
990 - cmd: The command (a list) to run.
991 - cwd: Current directory to start the child process in.
992 - tracename: Name of the trace in the logname file.
993 - output: If False, redirects output to PIPEs.
994
995 Returns a tuple (resultcode, output) and updates the internal trace
996 entries.
997 """
998 # The implementation adds an item to self._traces.
999 raise NotImplementedError(self.__class__.__name__)
1000
1001 def close(self, _timeout=None):
1002 """Saves the meta-data in the logname file.
1003
1004 For kernel-based tracing, stops the tracing subsystem.
1005
1006 Must not be used manually when using 'with' construct.
1007 """
1008 with self._lock:
1009 assert self._initialized
1010 try:
1011 data = {
1012 'traces': self._traces,
1013 }
1014 write_json(self._logname, data, False)
1015 finally:
1016 self._initialized = False
1017
1018 def post_process_log(self):
1019 """Post-processes the log so it becomes faster to load afterward.
1020
1021 Must not be used manually when using 'with' construct.
1022 """
1023 assert not self._initialized, 'Must stop tracing first.'
1024
1025 def __enter__(self):
1026 """Enables 'with' statement."""
1027 return self
1028
1029 def __exit__(self, exc_type, exc_value, traceback):
1030 """Enables 'with' statement."""
1031 self.close()
1032 # If an exception was thrown, do not process logs.
1033 if not exc_type:
1034 self.post_process_log()
1035
1036 def get_tracer(self, logname):
1037 """Returns an ApiBase.Tracer instance.
1038
1039 Initializes the tracing subsystem, which is a requirement for kernel-based
1040 tracers. Only one tracer instance should be live at a time!
1041
1042 logname is the filepath to the json file that will contain the meta-data
1043 about the logs.
1044 """
1045 return self.Tracer(logname)
1046
1047 @staticmethod
1048 def clean_trace(logname):
1049 """Deletes an old log."""
1050 raise NotImplementedError()
1051
1052 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001053 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001054 """Processes trace logs and returns the files opened and the files that do
1055 not exist.
1056
1057 It does not track directories.
1058
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001059 Arguments:
1060 - logname: must be an absolute path.
1061 - blacklist: must be a lambda.
1062 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001063
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001064 Most of the time, files that do not exist are temporary test files that
1065 should be put in /tmp instead. See http://crbug.com/116251.
1066
1067 Returns a list of dict with keys:
1068 - results: A Results instance.
1069 - trace: The corresponding tracename parameter provided to
1070 get_tracer().trace().
1071 - output: Output gathered during execution, if get_tracer().trace(...,
1072 output=False) was used.
1073 """
1074 raise NotImplementedError(cls.__class__.__name__)
1075
1076
1077class Strace(ApiBase):
1078 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001079 @staticmethod
1080 def load_filename(filename):
1081 """Parses a filename in a log."""
1082 assert isinstance(filename, str)
1083 out = ''
1084 i = 0
1085 while i < len(filename):
1086 c = filename[i]
1087 if c == '\\':
1088 out += chr(int(filename[i+1:i+4], 8))
1089 i += 4
1090 else:
1091 out += c
1092 i += 1
1093 # TODO(maruel): That's not necessarily true that the current code page is
1094 # utf-8.
1095 return out.decode('utf-8')
1096
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001097 class Context(ApiBase.Context):
1098 """Processes a strace log line and keeps the list of existent and non
1099 existent files accessed.
1100
1101 Ignores directories.
1102
1103 Uses late-binding to processes the cwd of each process. The problem is that
1104 strace generates one log file per process it traced but doesn't give any
1105 information about which process was started when and by who. So we don't
1106 even know which process is the initial one. So process the logs out of
1107 order and use late binding with RelativePath to be able to deduce the
1108 initial directory of each process once all the logs are parsed.
1109 """
1110 class Process(ApiBase.Context.Process):
1111 """Represents the state of a process.
1112
1113 Contains all the information retrieved from the pid-specific log.
1114 """
1115 # Function names are using ([a-z_0-9]+)
1116 # This is the most common format. function(args) = result
1117 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.+?)\)\s+= (.+)$')
1118 # An interrupted function call, only grab the minimal header.
1119 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
1120 # A resumed function call.
1121 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
1122 # A process received a signal.
1123 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
1124 # A process didn't handle a signal. Ignore any junk appearing before,
1125 # because the process was forcibly killed so it won't open any new file.
1126 RE_KILLED = re.compile(
1127 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
1128 # The process has exited.
1129 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
1130 # A call was canceled. Ignore any prefix.
1131 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
1132 # Happens when strace fails to even get the function name.
1133 UNNAMED_FUNCTION = '????'
1134
1135 # Corner-case in python, a class member function decorator must not be
1136 # @staticmethod.
1137 def parse_args(regexp, expect_zero): # pylint: disable=E0213
1138 """Automatically convert the str 'args' into a list of processed
1139 arguments.
1140
1141 Arguments:
1142 - regexp is used to parse args.
1143 - expect_zero: one of True, False or None.
1144 - True: will check for result.startswith('0') first and will ignore
1145 the trace line completely otherwise. This is important because for
1146 many functions, the regexp will not process if the call failed.
1147 - False: will check for not result.startswith(('?', '-1')) for the
1148 same reason than with True.
1149 - None: ignore result.
1150 """
1151 def meta_hook(function):
1152 assert function.__name__.startswith('handle_')
1153 def hook(self, args, result):
1154 if expect_zero is True and not result.startswith('0'):
1155 return
1156 if expect_zero is False and result.startswith(('?', '-1')):
1157 return
1158 match = re.match(regexp, args)
1159 if not match:
1160 raise TracingFailure(
1161 'Failed to parse %s(%s) = %s' %
1162 (function.__name__[len('handle_'):], args, result),
1163 None, None, None)
1164 return function(self, match.groups(), result)
1165 return hook
1166 return meta_hook
1167
1168 class RelativePath(object):
1169 """A late-bound relative path."""
1170 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001171 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001172 self.parent = parent
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001173 assert isinstance(value, (None.__class__, str)), repr(value)
1174 self.value = Strace.load_filename(value) if value else value
1175 if self.value:
1176 assert '\\' not in self.value, value
1177 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001178
1179 def render(self):
1180 """Returns the current directory this instance is representing.
1181
1182 This function is used to return the late-bound value.
1183 """
1184 if self.value and self.value.startswith(u'/'):
1185 # An absolute path.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001186 # TODO(maruel): This is wrong, we can't assert it is utf-8.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001187 return self.value
1188 parent = self.parent.render() if self.parent else u'<None>'
1189 if self.value:
1190 return os.path.normpath(os.path.join(parent, self.value))
1191 return parent
1192
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001193 def __init__(self, root, pid):
1194 """Keeps enough information to be able to guess the original process
1195 root.
1196
1197 strace doesn't store which process was the initial process. So more
1198 information needs to be kept so the graph can be reconstructed from the
1199 flat map.
1200 """
1201 logging.info('%s(%d)' % (self.__class__.__name__, pid))
1202 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
1203 assert isinstance(root, ApiBase.Context)
1204 self._root = weakref.ref(root)
1205 # The dict key is the function name of the pending call, like 'open'
1206 # or 'execve'.
1207 self._pending_calls = {}
1208 self._line_number = 0
1209 # Current directory when the process started.
1210 self.initial_cwd = self.RelativePath(self._root(), None)
1211 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001212 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001213
1214 def get_cwd(self):
1215 """Returns the best known value of cwd."""
1216 return self.cwd or self.initial_cwd
1217
1218 def render(self):
1219 """Returns the string value of the RelativePath() object.
1220
1221 Used by RelativePath. Returns the initial directory and not the
1222 current one since the current directory 'cwd' validity is time-limited.
1223
1224 The validity is only guaranteed once all the logs are processed.
1225 """
1226 return self.initial_cwd.render()
1227
1228 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001229 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001230 self._line_number += 1
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001231 if self._done:
1232 raise TracingFailure(
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001233 'Found a trace for a terminated process or corrupted log',
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001234 None, None, None)
1235
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001236 if self.RE_SIGNAL.match(line):
1237 # Ignore signals.
1238 return
1239
1240 try:
1241 match = self.RE_KILLED.match(line)
1242 if match:
1243 # Converts a '+++ killed by Foo +++' trace into an exit_group().
1244 self.handle_exit_group(match.group(1), None)
1245 return
1246
1247 match = self.RE_PROCESS_EXITED.match(line)
1248 if match:
1249 # Converts a '+++ exited with 1 +++' trace into an exit_group()
1250 self.handle_exit_group(match.group(1), None)
1251 return
1252
1253 match = self.RE_UNFINISHED.match(line)
1254 if match:
1255 if match.group(1) in self._pending_calls:
1256 raise TracingFailure(
1257 'Found two unfinished calls for the same function',
1258 None, None, None,
1259 self._pending_calls)
1260 self._pending_calls[match.group(1)] = (
1261 match.group(1) + match.group(2))
1262 return
1263
1264 match = self.RE_UNAVAILABLE.match(line)
1265 if match:
1266 # This usually means a process was killed and a pending call was
1267 # canceled.
1268 # TODO(maruel): Look up the last exit_group() trace just above and
1269 # make sure any self._pending_calls[anything] is properly flushed.
1270 return
1271
1272 match = self.RE_RESUMED.match(line)
1273 if match:
1274 if match.group(1) not in self._pending_calls:
1275 raise TracingFailure(
1276 'Found a resumed call that was not logged as unfinished',
1277 None, None, None,
1278 self._pending_calls)
1279 pending = self._pending_calls.pop(match.group(1))
1280 # Reconstruct the line.
1281 line = pending + match.group(2)
1282
1283 match = self.RE_HEADER.match(line)
1284 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001285 # The line is corrupted. It happens occasionally when a process is
1286 # killed forcibly with activity going on. Assume the process died.
1287 # No other line can be processed afterward.
1288 self._done = True
1289 return
1290
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001291 if match.group(1) == self.UNNAMED_FUNCTION:
1292 return
1293
1294 # It's a valid line, handle it.
1295 handler = getattr(self, 'handle_%s' % match.group(1), None)
1296 if not handler:
1297 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1298 return handler(match.group(2), match.group(3))
1299 except TracingFailure, e:
1300 # Hack in the values since the handler could be a static function.
1301 e.pid = self.pid
1302 e.line = line
1303 e.line_number = self._line_number
1304 # Re-raise the modified exception.
1305 raise
1306 except (KeyError, NotImplementedError, ValueError), e:
1307 raise TracingFailure(
1308 'Trace generated a %s exception: %s' % (
1309 e.__class__.__name__, str(e)),
1310 self.pid,
1311 self._line_number,
1312 line,
1313 e)
1314
1315 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1316 def handle_access(self, args, _result):
1317 self._handle_file(args[0], True)
1318
1319 @parse_args(r'^\"(.+?)\"$', True)
1320 def handle_chdir(self, args, _result):
1321 """Updates cwd."""
1322 self.cwd = self.RelativePath(self, args[0])
1323 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1324
1325 def handle_clone(self, _args, result):
1326 """Transfers cwd."""
1327 if result.startswith(('?', '-1')):
1328 # The call failed.
1329 return
1330 # Update the other process right away.
1331 childpid = int(result)
1332 child = self._root().get_or_set_proc(childpid)
1333 if child.parentid is not None or childpid in self.children:
1334 raise TracingFailure(
1335 'Found internal inconsitency in process lifetime detection '
1336 'during a clone() call',
1337 None, None, None)
1338
1339 # Copy the cwd object.
1340 child.initial_cwd = self.get_cwd()
1341 child.parentid = self.pid
1342 # It is necessary because the logs are processed out of order.
1343 self.children.append(child)
1344
1345 def handle_close(self, _args, _result):
1346 pass
1347
1348 def handle_chmod(self, _args, _result):
1349 pass
1350
1351 def handle_creat(self, _args, _result):
1352 # Ignore files created, since they didn't need to exist.
1353 pass
1354
1355 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1356 def handle_execve(self, args, _result):
1357 # Even if in practice execve() doesn't returns when it succeeds, strace
1358 # still prints '0' as the result.
1359 filepath = args[0]
1360 self._handle_file(filepath, False)
1361 self.executable = self.RelativePath(self.get_cwd(), filepath)
1362 self.command = strace_process_quoted_arguments(args[1])
1363
1364 def handle_exit_group(self, _args, _result):
1365 """Removes cwd."""
1366 self.cwd = None
1367
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001368 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1369 def handle_faccessat(self, args, _results):
1370 if args[0] == 'AT_FDCWD':
1371 self._handle_file(args[1], True)
1372 else:
1373 raise Exception('Relative faccess not implemented.')
1374
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001375 def handle_fork(self, args, result):
1376 self._handle_unknown('fork', args, result)
1377
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001378 def handle_futex(self, _args, _result):
1379 pass
1380
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001381 def handle_getcwd(self, _args, _result):
1382 pass
1383
1384 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1385 def handle_link(self, args, _result):
1386 self._handle_file(args[0], False)
1387 self._handle_file(args[1], False)
1388
1389 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1390 def handle_lstat(self, args, _result):
1391 self._handle_file(args[0], True)
1392
1393 def handle_mkdir(self, _args, _result):
1394 pass
1395
1396 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1397 def handle_open(self, args, _result):
1398 if 'O_DIRECTORY' in args[1]:
1399 return
1400 self._handle_file(args[0], False)
1401
1402 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1403 def handle_openat(self, args, _result):
1404 if 'O_DIRECTORY' in args[2]:
1405 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001406 if args[0] == 'AT_FDCWD':
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001407 self._handle_file(args[1], False)
1408 else:
1409 # TODO(maruel): Implement relative open if necessary instead of the
1410 # AT_FDCWD flag, let's hope not since this means tracking all active
1411 # directory handles.
1412 raise Exception('Relative open via openat not implemented.')
1413
1414 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1415 def handle_readlink(self, args, _result):
1416 self._handle_file(args[0], False)
1417
1418 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1419 def handle_rename(self, args, _result):
1420 self._handle_file(args[0], False)
1421 self._handle_file(args[1], False)
1422
1423 def handle_rmdir(self, _args, _result):
1424 pass
1425
1426 def handle_setxattr(self, _args, _result):
1427 pass
1428
1429 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1430 def handle_stat(self, args, _result):
1431 self._handle_file(args[0], True)
1432
1433 def handle_symlink(self, _args, _result):
1434 pass
1435
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001436 @parse_args(r'^\"(.+?)\", \d+', True)
1437 def handle_truncate(self, args, _result):
1438 self._handle_file(args[0], False)
1439
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001440 def handle_unlink(self, _args, _result):
1441 # In theory, the file had to be created anyway.
1442 pass
1443
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001444 def handle_unlinkat(self, _args, _result):
1445 # In theory, the file had to be created anyway.
1446 pass
1447
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001448 def handle_statfs(self, _args, _result):
1449 pass
1450
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001451 def handle_utimensat(self, _args, _result):
1452 pass
1453
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001454 def handle_vfork(self, args, result):
1455 self._handle_unknown('vfork', args, result)
1456
1457 @staticmethod
1458 def _handle_unknown(function, args, result):
1459 raise TracingFailure(
1460 'Unexpected/unimplemented trace %s(%s)= %s' %
1461 (function, args, result),
1462 None, None, None)
1463
1464 def _handle_file(self, filepath, touch_only):
1465 filepath = self.RelativePath(self.get_cwd(), filepath)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001466 #assert not touch_only, render(filepath)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001467 self.add_file(filepath, touch_only)
1468
1469 def __init__(self, blacklist, initial_cwd):
1470 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001471 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001472 self.initial_cwd = initial_cwd
1473
1474 def render(self):
1475 """Returns the string value of the initial cwd of the root process.
1476
1477 Used by RelativePath.
1478 """
1479 return self.initial_cwd
1480
1481 def on_line(self, pid, line):
1482 """Transfers control into the Process.on_line() function."""
1483 self.get_or_set_proc(pid).on_line(line.strip())
1484
1485 def to_results(self):
1486 """Finds back the root process and verify consistency."""
1487 # TODO(maruel): Absolutely unecessary, fix me.
1488 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1489 if len(root) != 1:
1490 raise TracingFailure(
1491 'Found internal inconsitency in process lifetime detection '
1492 'while finding the root process',
1493 None,
1494 None,
1495 None,
1496 sorted(p.pid for p in root))
1497 self.root_process = root[0]
1498 process = self.root_process.to_results_process()
1499 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1500 raise TracingFailure(
1501 'Found internal inconsitency in process lifetime detection '
1502 'while looking for len(tree) == len(list)',
1503 None,
1504 None,
1505 None,
1506 sorted(self._process_lookup),
1507 sorted(p.pid for p in process.all))
1508 return Results(process)
1509
1510 def get_or_set_proc(self, pid):
1511 """Returns the Context.Process instance for this pid or creates a new one.
1512 """
1513 if not pid or not isinstance(pid, int):
1514 raise TracingFailure(
1515 'Unpexpected value for pid: %r' % pid,
1516 pid,
1517 None,
1518 None,
1519 pid)
1520 if pid not in self._process_lookup:
1521 self._process_lookup[pid] = self.Process(self, pid)
1522 return self._process_lookup[pid]
1523
1524 @classmethod
1525 def traces(cls):
1526 """Returns the list of all handled traces to pass this as an argument to
1527 strace.
1528 """
1529 prefix = 'handle_'
1530 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1531
1532 class Tracer(ApiBase.Tracer):
1533 MAX_LEN = 256
1534
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001535 def __init__(self, logname, use_sudo):
1536 super(Strace.Tracer, self).__init__(logname)
1537 self.use_sudo = use_sudo
1538
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001539 def trace(self, cmd, cwd, tracename, output):
1540 """Runs strace on an executable."""
1541 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1542 assert os.path.isabs(cmd[0]), cmd[0]
1543 assert os.path.isabs(cwd), cwd
1544 assert os.path.normpath(cwd) == cwd, cwd
1545 with self._lock:
1546 if not self._initialized:
1547 raise TracingFailure(
1548 'Called Tracer.trace() on an unitialized object',
1549 None, None, None, tracename)
1550 assert tracename not in (i['trace'] for i in self._traces)
1551 stdout = stderr = None
1552 if output:
1553 stdout = subprocess.PIPE
1554 stderr = subprocess.STDOUT
1555 # Ensure all file related APIs are hooked.
1556 traces = ','.join(Strace.Context.traces() + ['file'])
1557 trace_cmd = [
1558 'strace',
1559 '-ff',
1560 '-s', '%d' % self.MAX_LEN,
1561 '-e', 'trace=%s' % traces,
1562 '-o', self._logname + '.' + tracename,
1563 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001564 if self.use_sudo is True:
1565 trace_cmd.insert(0, 'sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001566 child = subprocess.Popen(
1567 trace_cmd + cmd,
1568 cwd=cwd,
1569 stdin=subprocess.PIPE,
1570 stdout=stdout,
1571 stderr=stderr)
1572 out = child.communicate()[0]
1573 # TODO(maruel): Walk the logs and figure out the root process would
1574 # simplify parsing the logs a *lot*.
1575 with self._lock:
1576 assert tracename not in (i['trace'] for i in self._traces)
1577 self._traces.append(
1578 {
1579 'cmd': cmd,
1580 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001581 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001582 # The pid of strace process, not very useful.
1583 'pid': child.pid,
1584 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001585 })
1586 return child.returncode, out
1587
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001588 def __init__(self, use_sudo=None):
1589 super(Strace, self).__init__()
1590 self.use_sudo = use_sudo
1591
1592 def get_tracer(self, logname):
1593 return self.Tracer(logname, self.use_sudo)
1594
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001595 @staticmethod
1596 def clean_trace(logname):
1597 if os.path.isfile(logname):
1598 os.remove(logname)
1599 # Also delete any pid specific file from previous traces.
1600 for i in glob.iglob(logname + '.*'):
1601 if i.rsplit('.', 1)[1].isdigit():
1602 os.remove(i)
1603
1604 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001605 def parse_log(cls, logname, blacklist, trace_name):
1606 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001607 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001608 data = read_json(logname)
1609 out = []
1610 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001611 if trace_name and item['trace'] != trace_name:
1612 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001613 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001614 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001615 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001616 }
1617 try:
1618 context = cls.Context(blacklist, item['cwd'])
1619 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
1620 pid = pidfile.rsplit('.', 1)[1]
1621 if pid.isdigit():
1622 pid = int(pid)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001623 for line in open(pidfile, 'rb'):
1624 context.on_line(pid, line)
1625 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001626 except TracingFailure:
1627 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001628 out.append(result)
1629 return out
1630
1631
1632class Dtrace(ApiBase):
1633 """Uses DTrace framework through dtrace. Requires root access.
1634
1635 Implies Mac OSX.
1636
1637 dtruss can't be used because it has compatibility issues with python.
1638
1639 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1640 get the absolute path of the 'cwd' dtrace variable from the probe.
1641
1642 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1643 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1644 manually.
1645
1646 errno is not printed in the log since this implementation currently only cares
1647 about files that were successfully opened.
1648 """
1649 class Context(ApiBase.Context):
1650 # Format: index pid function(args)
1651 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1652
1653 # Arguments parsing.
1654 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1655 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1656 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1657 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1658 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1659 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1660
1661 O_DIRECTORY = 0x100000
1662
1663 class Process(ApiBase.Context.Process):
1664 def __init__(self, *args):
1665 super(Dtrace.Context.Process, self).__init__(*args)
1666 self.cwd = self.initial_cwd
1667
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001668 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001669 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001670 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001671 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001672 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001673 # Process ID of the temporary script created by create_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001674 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001675 self._initial_cwd = initial_cwd
1676 self._line_number = 0
1677
1678 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001679 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001680 self._line_number += 1
1681 match = self.RE_HEADER.match(line)
1682 if not match:
1683 raise TracingFailure(
1684 'Found malformed line: %s' % line,
1685 None,
1686 self._line_number,
1687 line)
1688 fn = getattr(
1689 self,
1690 'handle_%s' % match.group(2).replace('-', '_'),
1691 self._handle_ignored)
1692 # It is guaranteed to succeed because of the regexp. Or at least I thought
1693 # it would.
1694 pid = int(match.group(1))
1695 try:
1696 return fn(pid, match.group(3))
1697 except TracingFailure, e:
1698 # Hack in the values since the handler could be a static function.
1699 e.pid = pid
1700 e.line = line
1701 e.line_number = self._line_number
1702 # Re-raise the modified exception.
1703 raise
1704 except (KeyError, NotImplementedError, ValueError), e:
1705 raise TracingFailure(
1706 'Trace generated a %s exception: %s' % (
1707 e.__class__.__name__, str(e)),
1708 pid,
1709 self._line_number,
1710 line,
1711 e)
1712
1713 def to_results(self):
1714 process = self.root_process.to_results_process()
1715 # Internal concistency check.
1716 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1717 raise TracingFailure(
1718 'Found internal inconsitency in process lifetime detection '
1719 'while looking for len(tree) == len(list)',
1720 None,
1721 None,
1722 None,
1723 sorted(self._process_lookup),
1724 sorted(p.pid for p in process.all))
1725 return Results(process)
1726
1727 def handle_dtrace_BEGIN(self, _pid, args):
1728 if not self.RE_DTRACE_BEGIN.match(args):
1729 raise TracingFailure(
1730 'Found internal inconsitency in dtrace_BEGIN log line',
1731 None, None, None)
1732
1733 def handle_proc_start(self, pid, args):
1734 """Transfers cwd.
1735
1736 The dtrace script already takes care of only tracing the processes that
1737 are child of the traced processes so there is no need to verify the
1738 process hierarchy.
1739 """
1740 if pid in self._process_lookup:
1741 raise TracingFailure(
1742 'Found internal inconsitency in proc_start: %d started two times' %
1743 pid,
1744 None, None, None)
1745 match = self.RE_PROC_START.match(args)
1746 if not match:
1747 raise TracingFailure(
1748 'Failed to parse arguments: %s' % args,
1749 None, None, None)
1750 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001751 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001752 proc = self.root_process = self.Process(
1753 self.blacklist, pid, self._initial_cwd)
1754 elif ppid in self._process_lookup:
1755 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1756 self._process_lookup[ppid].children.append(proc)
1757 else:
1758 # Another process tree, ignore.
1759 return
1760 self._process_lookup[pid] = proc
1761 logging.debug(
1762 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001763 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001764
1765 def handle_proc_exit(self, pid, _args):
1766 """Removes cwd."""
1767 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001768 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001769 self._process_lookup[pid].cwd = None
1770
1771 def handle_execve(self, pid, args):
1772 """Sets the process' executable.
1773
1774 TODO(maruel): Read command line arguments. See
1775 https://discussions.apple.com/thread/1980539 for an example.
1776 https://gist.github.com/1242279
1777
1778 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1779 :)
1780 """
1781 if not pid in self._process_lookup:
1782 # Another process tree, ignore.
1783 return
1784 match = self.RE_EXECVE.match(args)
1785 if not match:
1786 raise TracingFailure(
1787 'Failed to parse arguments: %r' % args,
1788 None, None, None)
1789 proc = self._process_lookup[pid]
1790 proc.executable = match.group(1)
1791 proc.command = self.process_escaped_arguments(match.group(3))
1792 if int(match.group(2)) != len(proc.command):
1793 raise TracingFailure(
1794 'Failed to parse execve() arguments: %s' % args,
1795 None, None, None)
1796
1797 def handle_chdir(self, pid, args):
1798 """Updates cwd."""
1799 if pid not in self._process_lookup:
1800 # Another process tree, ignore.
1801 return
1802 cwd = self.RE_CHDIR.match(args).group(1)
1803 if not cwd.startswith('/'):
1804 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1805 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1806 else:
1807 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1808 cwd2 = cwd
1809 self._process_lookup[pid].cwd = cwd2
1810
1811 def handle_open_nocancel(self, pid, args):
1812 """Redirects to handle_open()."""
1813 return self.handle_open(pid, args)
1814
1815 def handle_open(self, pid, args):
1816 if pid not in self._process_lookup:
1817 # Another process tree, ignore.
1818 return
1819 match = self.RE_OPEN.match(args)
1820 if not match:
1821 raise TracingFailure(
1822 'Failed to parse arguments: %s' % args,
1823 None, None, None)
1824 flag = int(match.group(2), 16)
1825 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1826 # Ignore directories.
1827 return
1828 self._handle_file(pid, match.group(1))
1829
1830 def handle_rename(self, pid, args):
1831 if pid not in self._process_lookup:
1832 # Another process tree, ignore.
1833 return
1834 match = self.RE_RENAME.match(args)
1835 if not match:
1836 raise TracingFailure(
1837 'Failed to parse arguments: %s' % args,
1838 None, None, None)
1839 self._handle_file(pid, match.group(1))
1840 self._handle_file(pid, match.group(2))
1841
1842 def _handle_file(self, pid, filepath):
1843 if not filepath.startswith('/'):
1844 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1845 # We can get '..' in the path.
1846 filepath = os.path.normpath(filepath)
1847 # Sadly, still need to filter out directories here;
1848 # saw open_nocancel(".", 0, 0) = 0 lines.
1849 if os.path.isdir(filepath):
1850 return
1851 self._process_lookup[pid].add_file(filepath, False)
1852
1853 def handle_ftruncate(self, pid, args):
1854 """Just used as a signal to kill dtrace, ignoring."""
1855 pass
1856
1857 @staticmethod
1858 def _handle_ignored(pid, args):
1859 """Is called for all the event traces that are not handled."""
1860 raise NotImplementedError('Please implement me')
1861
1862 @staticmethod
1863 def process_escaped_arguments(text):
1864 """Extracts escaped arguments on a string and return the arguments as a
1865 list.
1866
1867 Implemented as an automaton.
1868
1869 Example:
1870 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1871 function will return ['python2.7', '-c', 'print("hi")]
1872 """
1873 if not text.endswith('\\0'):
1874 raise ValueError('String is not null terminated: %r' % text, text)
1875 text = text[:-2]
1876
1877 def unescape(x):
1878 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1879 out = []
1880 escaped = False
1881 for i in x:
1882 if i == '\\' and not escaped:
1883 escaped = True
1884 continue
1885 escaped = False
1886 out.append(i)
1887 return ''.join(out)
1888
1889 return [unescape(i) for i in text.split('\\001')]
1890
1891 class Tracer(ApiBase.Tracer):
1892 # pylint: disable=C0301
1893 #
1894 # To understand the following code, you'll want to take a look at:
1895 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
1896 # https://wikis.oracle.com/display/DTrace/Variables
1897 # http://docs.oracle.com/cd/E19205-01/820-4221/
1898 #
1899 # 0. Dump all the valid probes into a text file. It is important, you
1900 # want to redirect into a file and you don't want to constantly 'sudo'.
1901 # $ sudo dtrace -l > probes.txt
1902 #
1903 # 1. Count the number of probes:
1904 # $ wc -l probes.txt
1905 # 81823 # On OSX 10.7, including 1 header line.
1906 #
1907 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1908 # likes and skipping the header with NR>1:
1909 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1910 # dtrace
1911 # fbt
1912 # io
1913 # ip
1914 # lockstat
1915 # mach_trap
1916 # proc
1917 # profile
1918 # sched
1919 # syscall
1920 # tcp
1921 # vminfo
1922 #
1923 # 3. List of valid probes:
1924 # $ grep syscall probes.txt | less
1925 # or use dtrace directly:
1926 # $ sudo dtrace -l -P syscall | less
1927 #
1928 # trackedpid is an associative array where its value can be 0, 1 or 2.
1929 # 0 is for untracked processes and is the default value for items not
1930 # in the associative array.
1931 # 1 is for tracked processes.
1932 # 2 is for the script created by create_thunk() only. It is not tracked
1933 # itself but all its decendants are.
1934 #
1935 # The script will kill itself only once waiting_to_die == 1 and
1936 # current_processes == 0, so that both getlogin() was called and that
1937 # all traced processes exited.
1938 #
1939 # TODO(maruel): Use cacheable predicates. See
1940 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1941 D_CODE = """
1942 dtrace:::BEGIN {
1943 waiting_to_die = 0;
1944 current_processes = 0;
1945 logindex = 0;
1946 printf("%d %d %s_%s(\\"%s\\")\\n",
1947 logindex, PID, probeprov, probename, SCRIPT);
1948 logindex++;
1949 }
1950
1951 proc:::start /trackedpid[ppid]/ {
1952 trackedpid[pid] = 1;
1953 current_processes += 1;
1954 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1955 logindex, pid, probeprov, probename,
1956 ppid,
1957 execname,
1958 current_processes);
1959 logindex++;
1960 }
1961 /* Should use SCRIPT but there is no access to this variable at that
1962 * point. */
1963 proc:::start /ppid == PID && execname == "Python"/ {
1964 trackedpid[pid] = 2;
1965 current_processes += 1;
1966 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1967 logindex, pid, probeprov, probename,
1968 ppid,
1969 execname,
1970 current_processes);
1971 logindex++;
1972 }
1973 proc:::exit /trackedpid[pid] &&
1974 current_processes == 1 &&
1975 waiting_to_die == 1/ {
1976 trackedpid[pid] = 0;
1977 current_processes -= 1;
1978 printf("%d %d %s_%s(%d)\\n",
1979 logindex, pid, probeprov, probename,
1980 current_processes);
1981 logindex++;
1982 exit(0);
1983 }
1984 proc:::exit /trackedpid[pid]/ {
1985 trackedpid[pid] = 0;
1986 current_processes -= 1;
1987 printf("%d %d %s_%s(%d)\\n",
1988 logindex, pid, probeprov, probename,
1989 current_processes);
1990 logindex++;
1991 }
1992
1993 /* Use an arcane function to detect when we need to die */
1994 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
1995 waiting_to_die = 1;
1996 printf("%d %d %s()\\n", logindex, pid, probefunc);
1997 logindex++;
1998 }
1999 syscall::ftruncate:entry /
2000 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
2001 exit(0);
2002 }
2003
2004 syscall::open*:entry /trackedpid[pid] == 1/ {
2005 self->open_arg0 = arg0;
2006 self->open_arg1 = arg1;
2007 self->open_arg2 = arg2;
2008 }
2009 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
2010 this->open_arg0 = copyinstr(self->open_arg0);
2011 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
2012 logindex, pid, probefunc,
2013 this->open_arg0,
2014 self->open_arg1,
2015 self->open_arg2);
2016 logindex++;
2017 this->open_arg0 = 0;
2018 }
2019 syscall::open*:return /trackedpid[pid] == 1/ {
2020 self->open_arg0 = 0;
2021 self->open_arg1 = 0;
2022 self->open_arg2 = 0;
2023 }
2024
2025 syscall::rename:entry /trackedpid[pid] == 1/ {
2026 self->rename_arg0 = arg0;
2027 self->rename_arg1 = arg1;
2028 }
2029 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
2030 this->rename_arg0 = copyinstr(self->rename_arg0);
2031 this->rename_arg1 = copyinstr(self->rename_arg1);
2032 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
2033 logindex, pid, probefunc,
2034 this->rename_arg0,
2035 this->rename_arg1);
2036 logindex++;
2037 this->rename_arg0 = 0;
2038 this->rename_arg1 = 0;
2039 }
2040 syscall::rename:return /trackedpid[pid] == 1/ {
2041 self->rename_arg0 = 0;
2042 self->rename_arg1 = 0;
2043 }
2044
2045 /* Track chdir, it's painful because it is only receiving relative path.
2046 */
2047 syscall::chdir:entry /trackedpid[pid] == 1/ {
2048 self->chdir_arg0 = arg0;
2049 }
2050 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
2051 this->chdir_arg0 = copyinstr(self->chdir_arg0);
2052 printf("%d %d %s(\\"%s\\")\\n",
2053 logindex, pid, probefunc,
2054 this->chdir_arg0);
2055 logindex++;
2056 this->chdir_arg0 = 0;
2057 }
2058 syscall::chdir:return /trackedpid[pid] == 1/ {
2059 self->chdir_arg0 = 0;
2060 }
2061 """
2062
2063 # execve-specific code, tends to throw a lot of exceptions.
2064 D_CODE_EXECVE = """
2065 /* Finally what we care about! */
2066 syscall::exec*:entry /trackedpid[pid]/ {
2067 self->exec_arg0 = copyinstr(arg0);
2068 /* Incrementally probe for a NULL in the argv parameter of execve() to
2069 * figure out argc. */
2070 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
2071 * found. */
2072 self->exec_argc = 0;
2073 /* Probe for argc==1 */
2074 this->exec_argv = (user_addr_t*)copyin(
2075 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2076 self->exec_argc = this->exec_argv[self->exec_argc] ?
2077 (self->exec_argc + 1) : self->exec_argc;
2078
2079 /* Probe for argc==2 */
2080 this->exec_argv = (user_addr_t*)copyin(
2081 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2082 self->exec_argc = this->exec_argv[self->exec_argc] ?
2083 (self->exec_argc + 1) : self->exec_argc;
2084
2085 /* Probe for argc==3 */
2086 this->exec_argv = (user_addr_t*)copyin(
2087 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2088 self->exec_argc = this->exec_argv[self->exec_argc] ?
2089 (self->exec_argc + 1) : self->exec_argc;
2090
2091 /* Probe for argc==4 */
2092 this->exec_argv = (user_addr_t*)copyin(
2093 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2094 self->exec_argc = this->exec_argv[self->exec_argc] ?
2095 (self->exec_argc + 1) : self->exec_argc;
2096
2097 /* Copy the inputs strings since there is no guarantee they'll be
2098 * present after the call completed. */
2099 self->exec_argv0 = (self->exec_argc > 0) ?
2100 copyinstr(this->exec_argv[0]) : "";
2101 self->exec_argv1 = (self->exec_argc > 1) ?
2102 copyinstr(this->exec_argv[1]) : "";
2103 self->exec_argv2 = (self->exec_argc > 2) ?
2104 copyinstr(this->exec_argv[2]) : "";
2105 self->exec_argv3 = (self->exec_argc > 3) ?
2106 copyinstr(this->exec_argv[3]) : "";
2107 this->exec_argv = 0;
2108 }
2109 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2110 /* We need to join strings here, as using multiple printf() would
2111 * cause tearing when multiple threads/processes are traced.
2112 * Since it is impossible to escape a string and join it to another one,
2113 * like sprintf("%s%S", previous, more), use hackery.
2114 * Each of the elements are split with a \\1. \\0 cannot be used because
2115 * it is simply ignored. This will conflict with any program putting a
2116 * \\1 in their execve() string but this should be "rare enough" */
2117 this->args = "";
2118 /* Process exec_argv[0] */
2119 this->args = strjoin(
2120 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2121
2122 /* Process exec_argv[1] */
2123 this->args = strjoin(
2124 this->args, (self->exec_argc > 1) ? "\\1" : "");
2125 this->args = strjoin(
2126 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2127
2128 /* Process exec_argv[2] */
2129 this->args = strjoin(
2130 this->args, (self->exec_argc > 2) ? "\\1" : "");
2131 this->args = strjoin(
2132 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2133
2134 /* Process exec_argv[3] */
2135 this->args = strjoin(
2136 this->args, (self->exec_argc > 3) ? "\\1" : "");
2137 this->args = strjoin(
2138 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2139
2140 /* Prints self->exec_argc to permits verifying the internal
2141 * consistency since this code is quite fishy. */
2142 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2143 logindex, pid, probefunc,
2144 self->exec_arg0,
2145 self->exec_argc,
2146 this->args);
2147 logindex++;
2148 this->args = 0;
2149 }
2150 syscall::exec*:return /trackedpid[pid]/ {
2151 self->exec_arg0 = 0;
2152 self->exec_argc = 0;
2153 self->exec_argv0 = 0;
2154 self->exec_argv1 = 0;
2155 self->exec_argv2 = 0;
2156 self->exec_argv3 = 0;
2157 }
2158 """
2159
2160 # Code currently not used.
2161 D_EXTRANEOUS = """
2162 /* This is a good learning experience, since it traces a lot of things
2163 * related to the process and child processes.
2164 * Warning: it generates a gigantic log. For example, tracing
2165 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2166 * several minutes to execute.
2167 */
2168 /*
2169 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2170 printf("%d %d %s_%s() = %d\\n",
2171 logindex, pid, probeprov, probefunc, errno);
2172 logindex++;
2173 }
2174 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2175 printf("%d %d %s_%s() = %d\\n",
2176 logindex, pid, probeprov, probefunc, errno);
2177 logindex++;
2178 }
2179 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2180 printf("%d %d %s_%s() = %d\\n",
2181 logindex, pid, probeprov, probefunc, errno);
2182 logindex++;
2183 }
2184 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2185 printf("%d %d %s_%s() = %d\\n",
2186 logindex, pid, probeprov, probefunc, errno);
2187 logindex++;
2188 }
2189 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2190 printf("%d %d %s_%s() = %d\\n",
2191 logindex, pid, probeprov, probefunc, errno);
2192 logindex++;
2193 }
2194 */
2195 /* TODO(maruel): *stat* functions and friends
2196 syscall::access:return,
2197 syscall::chdir:return,
2198 syscall::chflags:return,
2199 syscall::chown:return,
2200 syscall::chroot:return,
2201 syscall::getattrlist:return,
2202 syscall::getxattr:return,
2203 syscall::lchown:return,
2204 syscall::lstat64:return,
2205 syscall::lstat:return,
2206 syscall::mkdir:return,
2207 syscall::pathconf:return,
2208 syscall::readlink:return,
2209 syscall::removexattr:return,
2210 syscall::setxattr:return,
2211 syscall::stat64:return,
2212 syscall::stat:return,
2213 syscall::truncate:return,
2214 syscall::unlink:return,
2215 syscall::utimes:return,
2216 */
2217 """
2218
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002219 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002220 """Starts the log collection with dtrace.
2221
2222 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2223 this needs to wait for dtrace to be "warmed up".
2224 """
2225 super(Dtrace.Tracer, self).__init__(logname)
2226 self._script = create_thunk()
2227 # This unique dummy temp file is used to signal the dtrace script that it
2228 # should stop as soon as all the child processes are done. A bit hackish
2229 # but works fine enough.
2230 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2231 prefix='trace_signal_file')
2232
2233 # Note: do not use the -p flag. It's useless if the initial process quits
2234 # too fast, resulting in missing traces from the grand-children. The D
2235 # code manages the dtrace lifetime itself.
2236 trace_cmd = [
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002237 'dtrace',
2238 # Use a larger buffer if getting 'out of scratch space' errors.
2239 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2240 '-b', '10m',
2241 '-x', 'dynvarsize=10m',
2242 #'-x', 'dtrace_global_maxsize=1m',
2243 '-x', 'evaltime=exec',
2244 '-o', '/dev/stderr',
2245 '-q',
2246 '-n', self._get_dtrace_code(),
2247 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002248 if use_sudo is not False:
2249 trace_cmd.insert(0, 'sudo')
2250
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002251 with open(self._logname + '.log', 'wb') as logfile:
2252 self._dtrace = subprocess.Popen(
2253 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2254 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2255
2256 # Reads until one line is printed, which signifies dtrace is up and ready.
2257 with open(self._logname + '.log', 'rb') as logfile:
2258 while 'dtrace_BEGIN' not in logfile.readline():
2259 if self._dtrace.poll() is not None:
2260 # Do a busy wait. :/
2261 break
2262 logging.debug('dtrace started')
2263
2264 def _get_dtrace_code(self):
2265 """Setups the D code to implement child process tracking.
2266
2267 Injects the cookie in the script so it knows when to stop.
2268
2269 The script will detect any instance of the script created with
2270 create_thunk() and will start tracing it.
2271 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002272 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002273 'inline int PID = %d;\n'
2274 'inline string SCRIPT = "%s";\n'
2275 'inline int FILE_ID = %d;\n'
2276 '\n'
2277 '%s') % (
2278 os.getpid(),
2279 self._script,
2280 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002281 self.D_CODE)
2282 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2283 # Do not enable by default since it tends to spew dtrace: error lines
2284 # because the execve() parameters are not in valid memory at the time of
2285 # logging.
2286 # TODO(maruel): Find a way to make this reliable since it's useful but
2287 # only works in limited/trivial uses cases for now.
2288 out += self.D_CODE_EXECVE
2289 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002290
2291 def trace(self, cmd, cwd, tracename, output):
2292 """Runs dtrace on an executable.
2293
2294 This dtruss is broken when it starts the process itself or when tracing
2295 child processes, this code starts a wrapper process
2296 generated with create_thunk() which starts the executable to trace.
2297 """
2298 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2299 assert os.path.isabs(cmd[0]), cmd[0]
2300 assert os.path.isabs(cwd), cwd
2301 assert os.path.normpath(cwd) == cwd, cwd
2302 with self._lock:
2303 if not self._initialized:
2304 raise TracingFailure(
2305 'Called Tracer.trace() on an unitialized object',
2306 None, None, None, tracename)
2307 assert tracename not in (i['trace'] for i in self._traces)
2308
2309 # Starts the script wrapper to start the child process. This signals the
2310 # dtrace script that this process is to be traced.
2311 stdout = stderr = None
2312 if output:
2313 stdout = subprocess.PIPE
2314 stderr = subprocess.STDOUT
2315 child_cmd = [
2316 sys.executable,
2317 self._script,
2318 tracename,
2319 ]
2320 # Call a dummy function so that dtrace knows I'm about to launch a process
2321 # that needs to be traced.
2322 # Yummy.
2323 child = subprocess.Popen(
2324 child_cmd + fix_python_path(cmd),
2325 stdin=subprocess.PIPE,
2326 stdout=stdout,
2327 stderr=stderr,
2328 cwd=cwd)
2329 logging.debug('Started child pid: %d' % child.pid)
2330
2331 out = child.communicate()[0]
2332 # This doesn't mean tracing is done, one of the grand-child process may
2333 # still be alive. It will be tracked with the dtrace script.
2334
2335 with self._lock:
2336 assert tracename not in (i['trace'] for i in self._traces)
2337 self._traces.append(
2338 {
2339 'cmd': cmd,
2340 'cwd': cwd,
2341 # The pid of strace process, not very useful.
2342 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002343 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002344 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002345 })
2346 return child.returncode, out
2347
2348 def close(self, timeout=None):
2349 """Terminates dtrace."""
2350 logging.debug('close(%s)' % timeout)
2351 try:
2352 try:
2353 super(Dtrace.Tracer, self).close(timeout)
2354 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002355 # ftruncate doesn't exist on Windows.
2356 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002357 if timeout:
2358 start = time.time()
2359 # Use polling. :/
2360 while (self._dtrace.poll() is None and
2361 (time.time() - start) < timeout):
2362 time.sleep(0.1)
2363 self._dtrace.kill()
2364 self._dtrace.wait()
2365 finally:
2366 # Make sure to kill it in any case.
2367 if self._dtrace.poll() is None:
2368 try:
2369 self._dtrace.kill()
2370 self._dtrace.wait()
2371 except OSError:
2372 pass
2373
2374 if self._dtrace.returncode != 0:
2375 # Warn about any dtrace failure but basically ignore it.
2376 print 'dtrace failure: %s' % self._dtrace.returncode
2377 finally:
2378 os.close(self._dummy_file_id)
2379 os.remove(self._dummy_file_name)
2380 os.remove(self._script)
2381
2382 def post_process_log(self):
2383 """Sorts the log back in order when each call occured.
2384
2385 dtrace doesn't save the buffer in strict order since it keeps one buffer
2386 per CPU.
2387 """
2388 super(Dtrace.Tracer, self).post_process_log()
2389 logname = self._logname + '.log'
2390 with open(logname, 'rb') as logfile:
2391 lines = [l for l in logfile if l.strip()]
2392 errors = [l for l in lines if l.startswith('dtrace:')]
2393 if errors:
2394 raise TracingFailure(
2395 'Found errors in the trace: %s' % '\n'.join(errors),
2396 None, None, None, logname)
2397 try:
2398 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2399 except ValueError:
2400 raise TracingFailure(
2401 'Found errors in the trace: %s' % '\n'.join(
2402 l for l in lines if l.split(' ', 1)[0].isdigit()),
2403 None, None, None, logname)
2404 with open(logname, 'wb') as logfile:
2405 logfile.write(''.join(lines))
2406
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002407 def __init__(self, use_sudo=None):
2408 super(Dtrace, self).__init__()
2409 self.use_sudo = use_sudo
2410
2411 def get_tracer(self, logname):
2412 return self.Tracer(logname, self.use_sudo)
2413
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002414 @staticmethod
2415 def clean_trace(logname):
2416 for ext in ('', '.log'):
2417 if os.path.isfile(logname + ext):
2418 os.remove(logname + ext)
2419
2420 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002421 def parse_log(cls, logname, blacklist, trace_name):
2422 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002423 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002424
2425 def blacklist_more(filepath):
2426 # All the HFS metadata is in the form /.vol/...
2427 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2428
2429 data = read_json(logname)
2430 out = []
2431 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002432 if trace_name and item['trace'] != trace_name:
2433 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002434 result = {
2435 'output': item['output'],
2436 'trace': item['trace'],
2437 }
2438 try:
2439 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002440 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2441 # be valid UTF-8 and we control the log output.
2442 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002443 context.on_line(line)
2444 result['results'] = context.to_results()
2445 except TracingFailure:
2446 result['exception'] = sys.exc_info()
2447 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002448 return out
2449
2450
2451class LogmanTrace(ApiBase):
2452 """Uses the native Windows ETW based tracing functionality to trace a child
2453 process.
2454
2455 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2456 the Windows Kernel doesn't have a concept of 'current working directory' at
2457 all. A Win32 process has a map of current directories, one per drive letter
2458 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2459 opened relative to another file_object or as an absolute path. All the current
2460 working directory logic is done in user mode.
2461 """
2462 class Context(ApiBase.Context):
2463 """Processes a ETW log line and keeps the list of existent and non
2464 existent files accessed.
2465
2466 Ignores directories.
2467 """
2468 # These indexes are for the stripped version in json.
2469 EVENT_NAME = 0
2470 TYPE = 1
2471 PID = 2
2472 TID = 3
2473 PROCESSOR_ID = 4
2474 TIMESTAMP = 5
2475 USER_DATA = 6
2476
2477 class Process(ApiBase.Context.Process):
2478 def __init__(self, *args):
2479 super(LogmanTrace.Context.Process, self).__init__(*args)
2480 # Handle file objects that succeeded.
2481 self.file_objects = {}
2482
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002483 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2484 logging.info(
2485 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2486 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002487 super(LogmanTrace.Context, self).__init__(blacklist)
2488 self._drive_map = DosDriveMap()
2489 # Threads mapping to the corresponding process id.
2490 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002491 # Process ID of the tracer, e.g. the temporary script created by
2492 # create_thunk(). This is tricky because the process id may have been
2493 # reused.
2494 self._thunk_pid = thunk_pid
2495 self._thunk_cmd = thunk_cmd
2496 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002497 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002498 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002499
2500 def on_line(self, line):
2501 """Processes a json Event line."""
2502 self._line_number += 1
2503 try:
2504 # By Opcode
2505 handler = getattr(
2506 self,
2507 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2508 None)
2509 if not handler:
2510 raise TracingFailure(
2511 'Unexpected event %s_%s' % (
2512 line[self.EVENT_NAME], line[self.TYPE]),
2513 None, None, None)
2514 handler(line)
2515 except TracingFailure, e:
2516 # Hack in the values since the handler could be a static function.
2517 e.pid = line[self.PID]
2518 e.line = line
2519 e.line_number = self._line_number
2520 # Re-raise the modified exception.
2521 raise
2522 except (KeyError, NotImplementedError, ValueError), e:
2523 raise TracingFailure(
2524 'Trace generated a %s exception: %s' % (
2525 e.__class__.__name__, str(e)),
2526 line[self.PID],
2527 self._line_number,
2528 line,
2529 e)
2530
2531 def to_results(self):
2532 if not self.root_process:
2533 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002534 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002535 None, None, None)
2536 process = self.root_process.to_results_process()
2537 return Results(process)
2538
2539 def _thread_to_process(self, tid):
2540 """Finds the process from the thread id."""
2541 tid = int(tid, 16)
2542 pid = self._threads_active.get(tid)
2543 if not pid or not self._process_lookup.get(pid):
2544 return
2545 return self._process_lookup[pid]
2546
2547 @classmethod
2548 def handle_EventTrace_Header(cls, line):
2549 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2550 BUFFER_SIZE = cls.USER_DATA
2551 #VERSION = cls.USER_DATA + 1
2552 #PROVIDER_VERSION = cls.USER_DATA + 2
2553 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2554 #END_TIME = cls.USER_DATA + 4
2555 #TIMER_RESOLUTION = cls.USER_DATA + 5
2556 #MAX_FILE_SIZE = cls.USER_DATA + 6
2557 #LOG_FILE_MODE = cls.USER_DATA + 7
2558 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2559 #START_BUFFERS = cls.USER_DATA + 9
2560 #POINTER_SIZE = cls.USER_DATA + 10
2561 EVENTS_LOST = cls.USER_DATA + 11
2562 #CPU_SPEED = cls.USER_DATA + 12
2563 #LOGGER_NAME = cls.USER_DATA + 13
2564 #LOG_FILE_NAME = cls.USER_DATA + 14
2565 #BOOT_TIME = cls.USER_DATA + 15
2566 #PERF_FREQ = cls.USER_DATA + 16
2567 #START_TIME = cls.USER_DATA + 17
2568 #RESERVED_FLAGS = cls.USER_DATA + 18
2569 #BUFFERS_LOST = cls.USER_DATA + 19
2570 #SESSION_NAME_STRING = cls.USER_DATA + 20
2571 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2572 if line[EVENTS_LOST] != '0':
2573 raise TracingFailure(
2574 ( '%s events were lost during trace, please increase the buffer '
2575 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2576 None, None, None)
2577
2578 def handle_FileIo_Cleanup(self, line):
2579 """General wisdom: if a file is closed, it's because it was opened.
2580
2581 Note that FileIo_Close is not used since if a file was opened properly but
2582 not closed before the process exits, only Cleanup will be logged.
2583 """
2584 #IRP = self.USER_DATA
2585 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2586 FILE_OBJECT = self.USER_DATA + 2
2587 #FILE_KEY = self.USER_DATA + 3
2588 proc = self._thread_to_process(line[TTID])
2589 if not proc:
2590 # Not a process we care about.
2591 return
2592 file_object = line[FILE_OBJECT]
2593 if file_object in proc.file_objects:
2594 proc.add_file(proc.file_objects.pop(file_object), False)
2595
2596 def handle_FileIo_Create(self, line):
2597 """Handles a file open.
2598
2599 All FileIo events are described at
2600 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2601 for some value of 'description'.
2602
2603 " (..) process and thread id values of the IO events (..) are not valid "
2604 http://msdn.microsoft.com/magazine/ee358703.aspx
2605
2606 The FileIo.Create event doesn't return if the CreateFile() call
2607 succeeded, so keep track of the file_object and check that it is
2608 eventually closed with FileIo_Cleanup.
2609 """
2610 #IRP = self.USER_DATA
2611 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2612 FILE_OBJECT = self.USER_DATA + 2
2613 #CREATE_OPTIONS = self.USER_DATA + 3
2614 #FILE_ATTRIBUTES = self.USER_DATA + 4
2615 #self.USER_DATA + SHARE_ACCESS = 5
2616 OPEN_PATH = self.USER_DATA + 6
2617
2618 proc = self._thread_to_process(line[TTID])
2619 if not proc:
2620 # Not a process we care about.
2621 return
2622
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002623 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002624 # Ignore directories and bare drive right away.
2625 if raw_path.endswith(os.path.sep):
2626 return
2627 filepath = self._drive_map.to_win32(raw_path)
2628 # Ignore bare drive right away. Some may still fall through with format
2629 # like '\\?\X:'
2630 if len(filepath) == 2:
2631 return
2632 file_object = line[FILE_OBJECT]
2633 if os.path.isdir(filepath):
2634 # There is no O_DIRECTORY equivalent on Windows. The closed is
2635 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2636 # simply discard directories are they are found.
2637 return
2638 # Override any stale file object
2639 proc.file_objects[file_object] = filepath
2640
2641 def handle_FileIo_Rename(self, line):
2642 # TODO(maruel): Handle?
2643 pass
2644
2645 def handle_Process_End(self, line):
2646 pid = line[self.PID]
2647 if self._process_lookup.get(pid):
2648 logging.info('Terminated: %d' % pid)
2649 self._process_lookup[pid] = None
2650 else:
2651 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002652 if self._thunk_process and self._thunk_process.pid == pid:
2653 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002654
2655 def handle_Process_Start(self, line):
2656 """Handles a new child process started by PID."""
2657 #UNIQUE_PROCESS_KEY = self.USER_DATA
2658 PROCESS_ID = self.USER_DATA + 1
2659 #PARENT_PID = self.USER_DATA + 2
2660 #SESSION_ID = self.USER_DATA + 3
2661 #EXIT_STATUS = self.USER_DATA + 4
2662 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2663 #USER_SID = self.USER_DATA + 6
2664 IMAGE_FILE_NAME = self.USER_DATA + 7
2665 COMMAND_LINE = self.USER_DATA + 8
2666
2667 ppid = line[self.PID]
2668 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002669 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002670 logging.debug(
2671 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002672 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002673
2674 if pid == self._thunk_pid:
2675 # Need to ignore processes we don't know about because the log is
2676 # system-wide. self._thunk_pid shall start only one process.
2677 # This is tricky though because Windows *loves* to reuse process id and
2678 # it happens often that the process ID of the thunk script created by
2679 # create_thunk() is reused. So just detecting the pid here is not
2680 # sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002681 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2682 logging.info(
2683 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2684 pid, self._trace_name, command_line, self._thunk_cmd)
2685 return
2686
2687 # TODO(maruel): The check is quite weak. Add the thunk path.
2688 if self._thunk_process:
2689 raise TracingFailure(
2690 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2691 'already set') % (self._thunk_pid, self._thunk_process.pid),
2692 None, None, None)
2693 proc = self.Process(self.blacklist, pid, None)
2694 self._thunk_process = proc
2695 return
2696 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002697 proc = self.Process(self.blacklist, pid, None)
2698 self.root_process = proc
2699 ppid = None
2700 elif self._process_lookup.get(ppid):
2701 proc = self.Process(self.blacklist, pid, None)
2702 self._process_lookup[ppid].children.append(proc)
2703 else:
2704 # Ignore
2705 return
2706 self._process_lookup[pid] = proc
2707
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002708 proc.command = command_line
2709 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002710 # proc.command[0] may be the absolute path of 'executable' but it may be
2711 # anything else too. If it happens that command[0] ends with executable,
2712 # use it, otherwise defaults to the base name.
2713 cmd0 = proc.command[0].lower()
2714 if not cmd0.endswith('.exe'):
2715 # TODO(maruel): That's not strictly true either.
2716 cmd0 += '.exe'
2717 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
2718 # Fix the path.
2719 cmd0 = cmd0.replace('/', os.path.sep)
2720 cmd0 = os.path.normpath(cmd0)
2721 proc.executable = get_native_path_case(cmd0)
2722 logging.info(
2723 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2724
2725 def handle_Thread_End(self, line):
2726 """Has the same parameters as Thread_Start."""
2727 tid = int(line[self.TID], 16)
2728 self._threads_active.pop(tid, None)
2729
2730 def handle_Thread_Start(self, line):
2731 """Handles a new thread created.
2732
2733 Do not use self.PID here since a process' initial thread is created by
2734 the parent process.
2735 """
2736 PROCESS_ID = self.USER_DATA
2737 TTHREAD_ID = self.USER_DATA + 1
2738 #STACK_BASE = self.USER_DATA + 2
2739 #STACK_LIMIT = self.USER_DATA + 3
2740 #USER_STACK_BASE = self.USER_DATA + 4
2741 #USER_STACK_LIMIT = self.USER_DATA + 5
2742 #AFFINITY = self.USER_DATA + 6
2743 #WIN32_START_ADDR = self.USER_DATA + 7
2744 #TEB_BASE = self.USER_DATA + 8
2745 #SUB_PROCESS_TAG = self.USER_DATA + 9
2746 #BASE_PRIORITY = self.USER_DATA + 10
2747 #PAGE_PRIORITY = self.USER_DATA + 11
2748 #IO_PRIORITY = self.USER_DATA + 12
2749 #THREAD_FLAGS = self.USER_DATA + 13
2750 # Do not use self.PID here since a process' initial thread is created by
2751 # the parent process.
2752 pid = int(line[PROCESS_ID], 16)
2753 tid = int(line[TTHREAD_ID], 16)
2754 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2755 self._threads_active[tid] = pid
2756
2757 @classmethod
2758 def supported_events(cls):
2759 """Returns all the procesed events."""
2760 out = []
2761 for member in dir(cls):
2762 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2763 if match:
2764 out.append(match.groups())
2765 return out
2766
2767 class Tracer(ApiBase.Tracer):
2768 # The basic headers.
2769 EXPECTED_HEADER = [
2770 u'Event Name',
2771 u'Type',
2772 u'Event ID',
2773 u'Version',
2774 u'Channel',
2775 u'Level', # 5
2776 u'Opcode',
2777 u'Task',
2778 u'Keyword',
2779 u'PID',
2780 u'TID', # 10
2781 u'Processor Number',
2782 u'Instance ID',
2783 u'Parent Instance ID',
2784 u'Activity ID',
2785 u'Related Activity ID', # 15
2786 u'Clock-Time',
2787 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2788 u'User(ms)', # pretty much useless.
2789 u'User Data', # Extra arguments that are event-specific.
2790 ]
2791 # Only the useful headers common to all entries are listed there. Any column
2792 # at 19 or higher is dependent on the specific event.
2793 EVENT_NAME = 0
2794 TYPE = 1
2795 PID = 9
2796 TID = 10
2797 PROCESSOR_ID = 11
2798 TIMESTAMP = 16
2799 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2800 USER_DATA = 19
2801
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002802 class CsvReader(object):
2803 """CSV reader that reads files generated by tracerpt.exe.
2804
2805 csv.reader() fails to read them properly, it mangles file names quoted
2806 with "" with a comma in it.
2807 """
2808 # 0. Had a ',' or one of the following ' ' after a comma, next should
2809 # be ' ', '"' or string or ',' for an empty field.
2810 ( HAD_DELIMITER,
2811 # 1. Processing an unquoted field up to ','.
2812 IN_STR,
2813 # 2. Processing a new field starting with '"'.
2814 STARTING_STR_QUOTED,
2815 # 3. Second quote in a row at the start of a field. It could be either
2816 # '""foo""' or '""'. Who the hell thought it was a great idea to use
2817 # the same character for delimiting and escaping?
2818 STARTING_SECOND_QUOTE,
2819 # 4. A quote inside a quoted string where the previous character was
2820 # not a quote, so the string is not empty. Can be either: end of a
2821 # quoted string (a delimiter) or a quote escape. The next char must be
2822 # either '"' or ','.
2823 HAD_QUOTE_IN_QUOTED,
2824 # 5. Second quote inside a quoted string.
2825 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
2826 # 6. Processing a field that started with '"'.
2827 IN_STR_QUOTED) = range(7)
2828
2829 def __init__(self, f):
2830 self.f = f
2831
2832 def __iter__(self):
2833 return self
2834
2835 def next(self):
2836 """Splits the line in fields."""
2837 line = self.f.readline()
2838 if not line:
2839 raise StopIteration()
2840 line = line.strip()
2841 fields = []
2842 state = self.HAD_DELIMITER
2843 for i, c in enumerate(line):
2844 if state == self.HAD_DELIMITER:
2845 if c == ',':
2846 # Empty field.
2847 fields.append('')
2848 elif c == ' ':
2849 # Ignore initial whitespaces
2850 pass
2851 elif c == '"':
2852 state = self.STARTING_STR_QUOTED
2853 fields.append('')
2854 else:
2855 # Start of a new field.
2856 state = self.IN_STR
2857 fields.append(c)
2858
2859 elif state == self.IN_STR:
2860 # Do not accept quote inside unquoted field.
2861 assert c != '"', (i, c, line, fields)
2862 if c == ',':
2863 fields[-1] = fields[-1].strip()
2864 state = self.HAD_DELIMITER
2865 else:
2866 fields[-1] = fields[-1] + c
2867
2868 elif state == self.STARTING_STR_QUOTED:
2869 if c == '"':
2870 # Do not store the character yet.
2871 state = self.STARTING_SECOND_QUOTE
2872 else:
2873 state = self.IN_STR_QUOTED
2874 fields[-1] = fields[-1] + c
2875
2876 elif state == self.STARTING_SECOND_QUOTE:
2877 if c == ',':
2878 # It was an empty field. '""' == ''.
2879 state = self.HAD_DELIMITER
2880 else:
2881 fields[-1] = fields[-1] + '"' + c
2882 state = self.IN_STR_QUOTED
2883
2884 elif state == self.HAD_QUOTE_IN_QUOTED:
2885 if c == ',':
2886 # End of the string.
2887 state = self.HAD_DELIMITER
2888 elif c == '"':
2889 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
2890 else:
2891 # The previous double-quote was just an unescaped quote.
2892 fields[-1] = fields[-1] + '"' + c
2893 state = self.IN_STR_QUOTED
2894
2895 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2896 if c == ',':
2897 # End of the string.
2898 state = self.HAD_DELIMITER
2899 fields[-1] = fields[-1] + '"'
2900 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00002901 # That's just how the logger rolls. Revert back to appending the
2902 # char and "guess" it was a quote in a double-quoted string.
2903 state = self.IN_STR_QUOTED
2904 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002905
2906 elif state == self.IN_STR_QUOTED:
2907 if c == '"':
2908 # Could be a delimiter or an escape.
2909 state = self.HAD_QUOTE_IN_QUOTED
2910 else:
2911 fields[-1] = fields[-1] + c
2912
2913 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2914 fields[-1] = fields[-1] + '"'
2915 else:
2916 assert state in (
2917 # Terminated with a normal field.
2918 self.IN_STR,
2919 # Terminated with an empty field.
2920 self.STARTING_SECOND_QUOTE,
2921 # Terminated with a normal quoted field.
2922 self.HAD_QUOTE_IN_QUOTED), (
2923 line, state, fields)
2924 return fields
2925
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002926 def __init__(self, logname):
2927 """Starts the log collection.
2928
2929 Requires administrative access. logman.exe is synchronous so no need for a
2930 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2931 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2932
2933 One can get the list of potentially interesting providers with:
2934 "logman query providers | findstr /i file"
2935 """
2936 super(LogmanTrace.Tracer, self).__init__(logname)
2937 self._script = create_thunk()
2938 cmd_start = [
2939 'logman.exe',
2940 'start',
2941 'NT Kernel Logger',
2942 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2943 # splitio,fileiocompletion,syscall,file,cswitch,img
2944 '(process,fileio,thread)',
2945 '-o', self._logname + '.etl',
2946 '-ets', # Send directly to kernel
2947 # Values extracted out of thin air.
2948 # Event Trace Session buffer size in kb.
2949 '-bs', '10240',
2950 # Number of Event Trace Session buffers.
2951 '-nb', '16', '256',
2952 ]
2953 logging.debug('Running: %s' % cmd_start)
2954 try:
2955 subprocess.check_call(
2956 cmd_start,
2957 stdin=subprocess.PIPE,
2958 stdout=subprocess.PIPE,
2959 stderr=subprocess.STDOUT)
2960 except subprocess.CalledProcessError, e:
2961 if e.returncode == -2147024891:
2962 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2963 elif e.returncode == -2144337737:
2964 print >> sys.stderr, (
2965 'A kernel trace was already running, stop it and try again')
2966 raise
2967
2968 def trace(self, cmd, cwd, tracename, output):
2969 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2970 assert os.path.isabs(cmd[0]), cmd[0]
2971 assert os.path.isabs(cwd), cwd
2972 assert os.path.normpath(cwd) == cwd, cwd
2973 with self._lock:
2974 if not self._initialized:
2975 raise TracingFailure(
2976 'Called Tracer.trace() on an unitialized object',
2977 None, None, None, tracename)
2978 assert tracename not in (i['trace'] for i in self._traces)
2979
2980 # Use "logman -?" for help.
2981
2982 stdout = stderr = None
2983 if output:
2984 stdout = subprocess.PIPE
2985 stderr = subprocess.STDOUT
2986
2987 # Run the child process.
2988 logging.debug('Running: %s' % cmd)
2989 # Use the temporary script generated with create_thunk() so we have a
2990 # clear pid owner. Since trace_inputs.py can be used as a library and
2991 # could trace multiple processes simultaneously, it makes it more complex
2992 # if the executable to be traced is executed directly here. It also solves
2993 # issues related to logman.exe that needs to be executed to control the
2994 # kernel trace.
2995 child_cmd = [
2996 sys.executable,
2997 self._script,
2998 tracename,
2999 ]
3000 child = subprocess.Popen(
3001 child_cmd + fix_python_path(cmd),
3002 cwd=cwd,
3003 stdin=subprocess.PIPE,
3004 stdout=stdout,
3005 stderr=stderr)
3006 logging.debug('Started child pid: %d' % child.pid)
3007 out = child.communicate()[0]
3008 # This doesn't mean all the grand-children are done. Sadly, we don't have
3009 # a good way to determine that.
3010
3011 with self._lock:
3012 assert tracename not in (i['trace'] for i in self._traces)
3013 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00003014 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003015 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00003016 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003017 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00003018 # Used to figure out the real process when process ids are reused.
3019 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003020 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003021 })
3022
3023 return child.returncode, out
3024
3025 def close(self, _timeout=None):
3026 """Stops the kernel log collection and converts the traces to text
3027 representation.
3028 """
3029 with self._lock:
3030 if not self._initialized:
3031 raise TracingFailure(
3032 'Called Tracer.close() on an unitialized object',
3033 None, None, None)
3034 os.remove(self._script)
3035 # Save metadata, add 'format' key..
3036 data = {
3037 'format': 'csv',
3038 'traces': self._traces,
3039 }
3040 write_json(self._logname, data, False)
3041
3042 cmd_stop = [
3043 'logman.exe',
3044 'stop',
3045 'NT Kernel Logger',
3046 '-ets', # Sends the command directly to the kernel.
3047 ]
3048 logging.debug('Running: %s' % cmd_stop)
3049 subprocess.check_call(
3050 cmd_stop,
3051 stdin=subprocess.PIPE,
3052 stdout=subprocess.PIPE,
3053 stderr=subprocess.STDOUT)
3054 self._initialized = False
3055
3056 def post_process_log(self):
3057 """Converts the .etl file into .csv then into .json."""
3058 super(LogmanTrace.Tracer, self).post_process_log()
3059 logformat = 'csv'
3060 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003061 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003062
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003063 def _trim_log(self, logformat):
3064 """Reduces the amount of data in original log by generating a 'reduced'
3065 log.
3066 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003067 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003068 file_handle = codecs.open(
3069 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003070
3071 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003072 assert sys.getfilesystemencoding() == 'mbcs'
3073 file_handle = codecs.open(
3074 self._logname + '.' + logformat, 'r',
3075 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003076
3077 supported_events = LogmanTrace.Context.supported_events()
3078
3079 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003080 """Loads items from the generator and returns the interesting data.
3081
3082 It filters out any uninteresting line and reduce the amount of data in
3083 the trace.
3084 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003085 for index, line in enumerate(generator):
3086 if not index:
3087 if line != self.EXPECTED_HEADER:
3088 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003089 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003090 None, None, None)
3091 continue
3092 # As you can see, the CSV is full of useful non-redundant information:
3093 if (line[2] != '0' or # Event ID
3094 line[3] not in ('2', '3') or # Version
3095 line[4] != '0' or # Channel
3096 line[5] != '0' or # Level
3097 line[7] != '0' or # Task
3098 line[8] != '0x0000000000000000' or # Keyword
3099 line[12] != '' or # Instance ID
3100 line[13] != '' or # Parent Instance ID
3101 line[14] != self.NULL_GUID or # Activity ID
3102 line[15] != ''): # Related Activity ID
3103 raise TracingFailure(
3104 'Found unexpected values in line: %s' % ' '.join(line),
3105 None, None, None)
3106
3107 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3108 continue
3109
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003110 yield [
3111 line[self.EVENT_NAME],
3112 line[self.TYPE],
3113 line[self.PID],
3114 line[self.TID],
3115 line[self.PROCESSOR_ID],
3116 line[self.TIMESTAMP],
3117 ] + line[self.USER_DATA:]
3118
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003119 # must not convert the trim() call into a list, since it will use too much
3120 # memory for large trace. use a csv file as a workaround since the json
3121 # parser requires a complete in-memory file.
3122 with open('%s.preprocessed' % self._logname, 'wb') as f:
3123 # $ and * can't be used in file name on windows, reducing the likelihood
3124 # of having to escape a string.
3125 out = csv.writer(
3126 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003127 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003128 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003129
3130 def _convert_log(self, logformat):
3131 """Converts the ETL trace to text representation.
3132
3133 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3134 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3135
3136 Arguments:
3137 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3138
3139 Use "tracerpt -?" for help.
3140 """
3141 LOCALE_INVARIANT = 0x7F
3142 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3143 cmd_convert = [
3144 'tracerpt.exe',
3145 '-l', self._logname + '.etl',
3146 '-o', self._logname + '.' + logformat,
3147 '-gmt', # Use UTC
3148 '-y', # No prompt
3149 # Use -of XML to get the header of each items after column 19, e.g. all
3150 # the actual headers of 'User Data'.
3151 ]
3152
3153 if logformat == 'csv':
3154 # tracerpt localizes the 'Type' column, for major brainfuck
3155 # entertainment. I can't imagine any sane reason to do that.
3156 cmd_convert.extend(['-of', 'CSV'])
3157 elif logformat == 'csv_utf16':
3158 # This causes it to use UTF-16, which doubles the log size but ensures
3159 # the log is readable for non-ASCII characters.
3160 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3161 elif logformat == 'xml':
3162 cmd_convert.extend(['-of', 'XML'])
3163 else:
3164 raise ValueError('Unexpected log format \'%s\'' % logformat)
3165 logging.debug('Running: %s' % cmd_convert)
3166 # This can takes tens of minutes for large logs.
3167 # Redirects all output to stderr.
3168 subprocess.check_call(
3169 cmd_convert,
3170 stdin=subprocess.PIPE,
3171 stdout=sys.stderr,
3172 stderr=sys.stderr)
3173
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003174 def __init__(self, use_sudo=False): # pylint: disable=W0613
3175 super(LogmanTrace, self).__init__()
3176 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3177
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003178 @staticmethod
3179 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003180 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003181 if os.path.isfile(logname + ext):
3182 os.remove(logname + ext)
3183
3184 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003185 def parse_log(cls, logname, blacklist, trace_name):
3186 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003187 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003188
3189 def blacklist_more(filepath):
3190 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3191 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3192
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003193 # Create a list of (Context, result_dict) tuples. This is necessary because
3194 # the csv file may be larger than the amount of available memory.
3195 contexes = [
3196 (
3197 cls.Context(
3198 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3199 {
3200 'output': item['output'],
3201 'trace': item['trace'],
3202 },
3203 )
3204 for item in read_json(logname)['traces']
3205 if not trace_name or item['trace'] == trace_name
3206 ]
3207
3208 # The log may be too large to fit in memory and it is not efficient to read
3209 # it multiple times, so multiplex the contexes instead, which is slightly
3210 # more awkward.
3211 with open('%s.preprocessed' % logname, 'rb') as f:
3212 lines = csv.reader(
3213 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3214 for encoded in lines:
3215 line = [s.decode('utf-8') for s in encoded]
3216 # Convert the PID in-place from hex.
3217 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3218 for context in contexes:
3219 if 'exception' in context[1]:
3220 continue
3221 try:
3222 context[0].on_line(line)
3223 except TracingFailure:
3224 context[1]['exception'] = sys.exc_info()
3225
3226 for context in contexes:
3227 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003228 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003229 context[1]['results'] = context[0].to_results()
3230
3231 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003232
3233
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003234def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003235 """Returns the correct implementation for the current OS."""
3236 if sys.platform == 'cygwin':
3237 raise NotImplementedError(
3238 'Not implemented for cygwin, start the script from Win32 python')
3239 flavors = {
3240 'win32': LogmanTrace,
3241 'darwin': Dtrace,
3242 'sunos5': Dtrace,
3243 'freebsd7': Dtrace,
3244 'freebsd8': Dtrace,
3245 }
3246 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003247 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003248
3249
3250def extract_directories(root_dir, files, blacklist):
3251 """Detects if all the files in a directory are in |files| and if so, replace
3252 the individual files by a Results.Directory instance.
3253
3254 Takes a list of Results.File instances and returns a shorter list of
3255 Results.File and Results.Directory instances.
3256
3257 Arguments:
3258 - root_dir: Optional base directory that shouldn't be search further.
3259 - files: list of Results.File instances.
3260 - blacklist: regexp of files to ignore, for example r'.+\.pyc'.
3261 """
3262 logging.info(
3263 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3264 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003265 # It is important for root_dir to not be a symlinked path, make sure to call
3266 # os.path.realpath() as needed.
3267 assert not root_dir or (
3268 os.path.realpath(get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003269 assert not any(isinstance(f, Results.Directory) for f in files)
3270 # Remove non existent files.
3271 files = [f for f in files if f.existent]
3272 if not files:
3273 return files
3274 # All files must share the same root, which can be None.
3275 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3276
3277 # Creates a {directory: {filename: File}} mapping, up to root.
3278 buckets = {}
3279 if root_dir:
3280 buckets[root_dir] = {}
3281 for fileobj in files:
3282 path = fileobj.full_path
3283 directory = os.path.dirname(path)
3284 assert directory
3285 # Do not use os.path.basename() so trailing os.path.sep is kept.
3286 basename = path[len(directory)+1:]
3287 files_in_directory = buckets.setdefault(directory, {})
3288 files_in_directory[basename] = fileobj
3289 # Add all the directories recursively up to root.
3290 while True:
3291 old_d = directory
3292 directory = os.path.dirname(directory)
3293 if directory + os.path.sep == root_dir or directory == old_d:
3294 break
3295 buckets.setdefault(directory, {})
3296
3297 root_prefix = len(root_dir) + 1 if root_dir else 0
3298 for directory in sorted(buckets, reverse=True):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003299 if not os.path.isdir(directory):
3300 logging.debug(
3301 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3302 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003303 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3304 expected = set(buckets[directory])
3305 if not (actual - expected):
3306 parent = os.path.dirname(directory)
3307 buckets[parent][os.path.basename(directory)] = Results.Directory(
3308 root_dir,
3309 directory[root_prefix:],
3310 False,
3311 sum(f.size for f in buckets[directory].itervalues()),
3312 sum(f.nb_files for f in buckets[directory].itervalues()))
3313 # Remove the whole bucket.
3314 del buckets[directory]
3315
3316 # Reverse the mapping with what remains. The original instances are returned,
3317 # so the cached meta data is kept.
3318 files = sum((x.values() for x in buckets.itervalues()), [])
3319 return sorted(files, key=lambda x: x.path)
3320
3321
3322def trace(logfile, cmd, cwd, api, output):
3323 """Traces an executable. Returns (returncode, output) from api.
3324
3325 Arguments:
3326 - logfile: file to write to.
3327 - cmd: command to run.
3328 - cwd: current directory to start the process in.
3329 - api: a tracing api instance.
3330 - output: if True, returns output, otherwise prints it at the console.
3331 """
3332 cmd = fix_python_path(cmd)
3333 api.clean_trace(logfile)
3334 with api.get_tracer(logfile) as tracer:
3335 return tracer.trace(cmd, cwd, 'default', output)
3336
3337
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003338def CMDclean(args):
3339 """Cleans up traces."""
3340 parser = OptionParserTraceInputs(command='clean')
3341 options, args = parser.parse_args(args)
3342 api = get_api()
3343 api.clean_trace(options.log)
3344 return 0
3345
3346
3347def CMDtrace(args):
3348 """Traces an executable."""
3349 parser = OptionParserTraceInputs(command='trace')
3350 parser.allow_interspersed_args = False
3351 parser.add_option(
3352 '-q', '--quiet', action='store_true',
3353 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003354 parser.add_option(
3355 '-s', '--sudo', action='store_true',
3356 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3357 parser.add_option(
3358 '-n', '--no-sudo', action='store_false',
3359 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003360 options, args = parser.parse_args(args)
3361
3362 if not args:
3363 parser.error('Please provide a command to run')
3364
3365 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3366 args[0] = os.path.abspath(args[0])
3367
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003368 # options.sudo default value is None, which is to do whatever tracer defaults
3369 # do.
3370 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003371 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3372
3373
3374def CMDread(args):
3375 """Reads the logs and prints the result."""
3376 parser = OptionParserTraceInputs(command='read')
3377 parser.add_option(
3378 '-V', '--variable',
3379 nargs=2,
3380 action='append',
3381 dest='variables',
3382 metavar='VAR_NAME directory',
3383 default=[],
3384 help=('Variables to replace relative directories against. Example: '
3385 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3386 'home dir with $HOME') % getpass.getuser())
3387 parser.add_option(
3388 '--root-dir',
3389 help='Root directory to base everything off it. Anything outside of this '
3390 'this directory will not be reported')
3391 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003392 '--trace-name',
3393 help='Only reads one of the trace. Defaults to reading all traces')
3394 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003395 '-j', '--json', action='store_true',
3396 help='Outputs raw result data as json')
3397 parser.add_option(
3398 '-b', '--blacklist', action='append', default=[],
3399 help='List of regexp to use as blacklist filter')
3400 options, args = parser.parse_args(args)
3401
3402 if options.root_dir:
maruel@chromium.org7dc17b32013-04-02 22:27:05 +00003403 options.root_dir = get_native_path_case(
3404 unicode(os.path.abspath(options.root_dir)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003405
3406 variables = dict(options.variables)
3407 api = get_api()
3408 def blacklist(f):
3409 return any(re.match(b, f) for b in options.blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003410 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003411 # Process each trace.
3412 output_as_json = []
3413 for item in data:
3414 if 'exception' in item:
maruel@chromium.org424f5692012-10-15 15:42:38 +00003415 # Do not abort the other traces.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003416 print >> sys.stderr, (
maruel@chromium.org424f5692012-10-15 15:42:38 +00003417 'Trace %s: Got an exception: %s' % (
3418 item['trace'], item['exception'][1]))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003419 continue
3420 results = item['results']
3421 if options.root_dir:
3422 results = results.strip_root(options.root_dir)
3423
3424 if options.json:
3425 output_as_json.append(results.flatten())
3426 else:
3427 simplified = extract_directories(
3428 options.root_dir, results.files, blacklist)
3429 simplified = [f.replace_variables(variables) for f in simplified]
3430 if len(data) > 1:
3431 print('Trace: %s' % item['trace'])
3432 print('Total: %d' % len(results.files))
3433 print('Non existent: %d' % len(results.non_existent))
3434 for f in results.non_existent:
3435 print(' %s' % f.path)
3436 print(
3437 'Interesting: %d reduced to %d' % (
3438 len(results.existent), len(simplified)))
3439 for f in simplified:
3440 print(' %s' % f.path)
3441
3442 if options.json:
3443 write_json(sys.stdout, output_as_json, False)
3444 return 0
3445
3446
3447class OptionParserWithLogging(optparse.OptionParser):
3448 """Adds --verbose option."""
3449 def __init__(self, verbose=0, **kwargs):
3450 optparse.OptionParser.__init__(self, **kwargs)
3451 self.add_option(
3452 '-v', '--verbose',
3453 action='count',
3454 default=verbose,
3455 help='Use multiple times to increase verbosity')
3456
3457 def parse_args(self, *args, **kwargs):
3458 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
3459 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
3460 logging.basicConfig(
3461 level=levels[min(len(levels)-1, options.verbose)],
3462 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
3463 return options, args
3464
3465
3466class OptionParserWithNiceDescription(OptionParserWithLogging):
3467 """Generates the description with the command's docstring."""
3468 def __init__(self, **kwargs):
3469 """Sets 'description' and 'usage' if not already specified."""
3470 command = kwargs.pop('command', 'help')
3471 kwargs.setdefault(
3472 'description',
3473 re.sub('[\r\n ]{2,}', ' ', get_command_handler(command).__doc__))
3474 kwargs.setdefault('usage', '%%prog %s [options]' % command)
3475 OptionParserWithLogging.__init__(self, **kwargs)
3476
3477
3478class OptionParserTraceInputs(OptionParserWithNiceDescription):
3479 """Adds automatic --log handling."""
3480 def __init__(self, **kwargs):
3481 OptionParserWithNiceDescription.__init__(self, **kwargs)
3482 self.add_option(
3483 '-l', '--log', help='Log file to generate or read, required')
3484
3485 def parse_args(self, *args, **kwargs):
3486 """Makes sure the paths make sense.
3487
3488 On Windows, / and \ are often mixed together in a path.
3489 """
3490 options, args = OptionParserWithNiceDescription.parse_args(
3491 self, *args, **kwargs)
3492 if not options.log:
3493 self.error('Must supply a log file with -l')
3494 options.log = os.path.abspath(options.log)
3495 return options, args
3496
3497
3498def extract_documentation():
3499 """Returns a dict {command: description} for each of documented command."""
3500 commands = (
3501 fn[3:]
3502 for fn in dir(sys.modules['__main__'])
3503 if fn.startswith('CMD') and get_command_handler(fn[3:]).__doc__)
3504 return dict((fn, get_command_handler(fn).__doc__) for fn in commands)
3505
3506
3507def CMDhelp(args):
3508 """Prints list of commands or help for a specific command."""
3509 doc = extract_documentation()
3510 # Calculates the optimal offset.
3511 offset = max(len(cmd) for cmd in doc)
3512 format_str = ' %-' + str(offset + 2) + 's %s'
3513 # Generate a one-liner documentation of each commands.
3514 commands_description = '\n'.join(
3515 format_str % (cmd, doc[cmd].split('\n')[0]) for cmd in sorted(doc))
3516
3517 parser = OptionParserWithNiceDescription(
3518 usage='%prog <command> [options]',
3519 description='Commands are:\n%s\n' % commands_description)
3520 parser.format_description = lambda _: parser.description
3521
3522 # Strip out any -h or --help argument.
3523 _, args = parser.parse_args([i for i in args if not i in ('-h', '--help')])
3524 if len(args) == 1:
3525 if not get_command_handler(args[0]):
3526 parser.error('Unknown command %s' % args[0])
3527 # The command was "%prog help command", replaces ourself with
3528 # "%prog command --help" so help is correctly printed out.
3529 return main(args + ['--help'])
3530 elif args:
3531 parser.error('Unknown argument "%s"' % ' '.join(args))
3532 parser.print_help()
3533 return 0
3534
3535
3536def get_command_handler(name):
3537 """Returns the command handler or CMDhelp if it doesn't exist."""
3538 return getattr(sys.modules['__main__'], 'CMD%s' % name, None)
3539
3540
3541def main_impl(argv):
3542 command = get_command_handler(argv[0] if argv else 'help')
3543 if not command:
3544 return CMDhelp(argv)
3545 return command(argv[1:])
3546
3547def main(argv):
maruel@chromium.org46e61cc2013-03-25 19:55:34 +00003548 run_isolated.disable_buffering()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003549 try:
3550 main_impl(argv)
3551 except TracingFailure, e:
3552 sys.stderr.write('\nError: ')
3553 sys.stderr.write(str(e))
3554 sys.stderr.write('\n')
3555 return 1
3556
3557
3558if __name__ == '__main__':
3559 sys.exit(main(sys.argv[1:]))