blob: 64a9d4f2451239777ad5c2d1dcb44c68dbbd5c44 [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
3# Copyright (c) 2012 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
23import getpass
24import glob
25import json
26import logging
maruel@chromium.org306e0e72012-11-02 18:22:03 +000027import unicodedata
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000028import optparse
29import os
30import re
31import subprocess
32import sys
33import tempfile
34import threading
35import time
36import weakref
37
38## OS-specific imports
39
40if sys.platform == 'win32':
41 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p
42 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611
43 from ctypes.wintypes import GetLastError # pylint: disable=E0611
44elif sys.platform == 'darwin':
45 import Carbon.File # pylint: disable=F0401
46 import MacOS # pylint: disable=F0401
47
maruel@chromium.org46e61cc2013-03-25 19:55:34 +000048import run_isolated
49
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000050
51BASE_DIR = os.path.dirname(os.path.abspath(__file__))
52ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
53
54
55class TracingFailure(Exception):
56 """An exception occured during tracing."""
57 def __init__(self, description, pid, line_number, line, *args):
58 super(TracingFailure, self).__init__(
59 description, pid, line_number, line, *args)
60 self.description = description
61 self.pid = pid
62 self.line_number = line_number
63 self.line = line
64 self.extra = args
65
66 def __str__(self):
67 out = self.description
68 if self.pid:
69 out += '\npid: %d' % self.pid
70 if self.line_number:
71 out += '\nline: %d' % self.line_number
72 if self.line:
73 out += '\n%s' % self.line
74 if self.extra:
75 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
76 return out
77
78
79## OS-specific functions
80
81if sys.platform == 'win32':
82 def QueryDosDevice(drive_letter):
83 """Returns the Windows 'native' path for a DOS drive letter."""
84 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
maruel@chromium.org306e0e72012-11-02 18:22:03 +000085 assert isinstance(drive_letter, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000086 # Guesswork. QueryDosDeviceW never returns the required number of bytes.
87 chars = 1024
maruel@chromium.org306e0e72012-11-02 18:22:03 +000088 drive_letter = drive_letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000089 p = create_unicode_buffer(chars)
90 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):
91 err = GetLastError()
92 if err:
93 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +000094 msg = u'QueryDosDevice(%s): %s (%d)' % (
95 drive_letter, FormatError(err), err)
96 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000097 return p.value
98
99
100 def GetShortPathName(long_path):
101 """Returns the Windows short path equivalent for a 'long' path."""
maruel@chromium.org68bb1482013-02-15 22:36:24 +0000102 assert isinstance(long_path, unicode), repr(long_path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000103 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
104 # not enforced.
105 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):
106 long_path = '\\\\?\\' + long_path
107 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)
108 if chars:
109 p = create_unicode_buffer(chars)
110 if windll.kernel32.GetShortPathNameW(long_path, p, chars):
111 return p.value
112
113 err = GetLastError()
114 if err:
115 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000116 msg = u'GetShortPathName(%s): %s (%d)' % (
117 long_path, FormatError(err), err)
118 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000119
120
121 def GetLongPathName(short_path):
122 """Returns the Windows long path equivalent for a 'short' path."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000123 assert isinstance(short_path, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000124 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
125 # not enforced.
126 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):
127 short_path = '\\\\?\\' + short_path
128 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)
129 if chars:
130 p = create_unicode_buffer(chars)
131 if windll.kernel32.GetLongPathNameW(short_path, p, chars):
132 return p.value
133
134 err = GetLastError()
135 if err:
136 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000137 msg = u'GetLongPathName(%s): %s (%d)' % (
138 short_path, FormatError(err), err)
139 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000140
141
142 def get_current_encoding():
143 """Returns the 'ANSI' code page associated to the process."""
144 return 'cp%d' % int(windll.kernel32.GetACP())
145
146
147 class DosDriveMap(object):
148 """Maps \Device\HarddiskVolumeN to N: on Windows."""
149 # Keep one global cache.
150 _MAPPING = {}
151
152 def __init__(self):
153 """Lazy loads the cache."""
154 if not self._MAPPING:
155 # This is related to UNC resolver on windows. Ignore that.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000156 self._MAPPING[u'\\Device\\Mup'] = None
157 self._MAPPING[u'\\SystemRoot'] = os.environ[u'SystemRoot']
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000158
159 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
160 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000161 letter = u'%s:' % letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000162 mapped = QueryDosDevice(letter)
163 if mapped in self._MAPPING:
164 logging.warn(
165 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
166 '. Drive letters are a user-mode concept and the kernel '
167 'traces only have NT path, so all accesses will be '
168 'associated with the first drive letter, independent of the '
169 'actual letter used by the code') % (
170 self._MAPPING[mapped], letter))
171 else:
172 self._MAPPING[mapped] = letter
173 except WindowsError: # pylint: disable=E0602
174 pass
175
176 def to_win32(self, path):
177 """Converts a native NT path to Win32/DOS compatible path."""
178 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
179 if not match:
180 raise ValueError(
181 'Can\'t convert %s into a Win32 compatible path' % path,
182 path)
183 if not match.group(1) in self._MAPPING:
184 # Unmapped partitions may be accessed by windows for the
185 # fun of it while the test is running. Discard these.
186 return None
187 drive = self._MAPPING[match.group(1)]
188 if not drive or not match.group(2):
189 return drive
190 return drive + match.group(2)
191
192
193 def isabs(path):
194 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""
195 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
196
197
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000198 def find_item_native_case(root, item):
199 """Gets the native path case of a single item based at root_path."""
200 if item == '..':
201 return item
202
203 root = get_native_path_case(root)
204 return os.path.basename(get_native_path_case(os.path.join(root, item)))
205
206
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000207 def get_native_path_case(p):
208 """Returns the native path case for an existing file.
209
210 On Windows, removes any leading '\\?\'.
211 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000212 assert isinstance(p, unicode), repr(p)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000213 if not isabs(p):
214 raise ValueError(
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000215 'get_native_path_case(%r): Require an absolute path' % p, p)
216
maruel@chromium.org037758d2012-12-10 17:59:46 +0000217 # Make sure it is normalized to os.path.sep. Do not do it here to keep the
218 # function fast
219 assert '/' not in p, p
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000220 suffix = ''
221 count = p.count(':')
222 if count > 1:
223 # This means it has an alternate-data stream. There could be 3 ':', since
224 # it could be the $DATA datastream of an ADS. Split the whole ADS suffix
225 # off and add it back afterward. There is no way to know the native path
226 # case of an alternate data stream.
227 items = p.split(':')
228 p = ':'.join(items[0:2])
229 suffix = ''.join(':' + i for i in items[2:])
230
maruel@chromium.org42b091d2013-04-02 22:27:02 +0000231 # TODO(maruel): Use os.path.normpath?
232 if p.endswith('.\\'):
233 p = p[:-2]
234
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000235 # Windows used to have an option to turn on case sensitivity on non Win32
236 # subsystem but that's out of scope here and isn't supported anymore.
237 # Go figure why GetShortPathName() is needed.
238 try:
239 out = GetLongPathName(GetShortPathName(p))
240 except OSError, e:
241 if e.args[0] in (2, 3, 5):
242 # The path does not exist. Try to recurse and reconstruct the path.
243 base = os.path.dirname(p)
244 rest = os.path.basename(p)
245 return os.path.join(get_native_path_case(base), rest)
246 raise
247 if out.startswith('\\\\?\\'):
248 out = out[4:]
249 # Always upper case the first letter since GetLongPathName() will return the
250 # drive letter in the case it was given.
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000251 return out[0].upper() + out[1:] + suffix
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000252
253
254 def CommandLineToArgvW(command_line):
255 """Splits a commandline into argv using CommandLineToArgvW()."""
256 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
257 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000258 assert isinstance(command_line, unicode)
259 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000260 try:
261 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
262 finally:
263 windll.kernel32.LocalFree(ptr)
264
265
266elif sys.platform == 'darwin':
267
268
269 # On non-windows, keep the stdlib behavior.
270 isabs = os.path.isabs
271
272
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000273 def _native_case(p):
274 """Gets the native path case. Warning: this function resolves symlinks."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000275 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000276 rel_ref, _ = Carbon.File.FSPathMakeRef(p.encode('utf-8'))
277 # The OSX underlying code uses NFD but python strings are in NFC. This
278 # will cause issues with os.listdir() for example. Since the dtrace log
279 # *is* in NFC, normalize it here.
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000280 out = unicodedata.normalize(
281 'NFC', rel_ref.FSRefMakePath().decode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000282 if p.endswith(os.path.sep) and not out.endswith(os.path.sep):
283 return out + os.path.sep
284 return out
285 except MacOS.Error, e:
286 if e.args[0] in (-43, -120):
287 # The path does not exist. Try to recurse and reconstruct the path.
288 # -43 means file not found.
289 # -120 means directory not found.
290 base = os.path.dirname(p)
291 rest = os.path.basename(p)
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000292 return os.path.join(_native_case(base), rest)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000293 raise OSError(
294 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])
295
296
297 def _split_at_symlink_native(base_path, rest):
298 """Returns the native path for a symlink."""
299 base, symlink, rest = split_at_symlink(base_path, rest)
300 if symlink:
301 if not base_path:
302 base_path = base
303 else:
304 base_path = safe_join(base_path, base)
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000305 symlink = find_item_native_case(base_path, symlink)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000306 return base, symlink, rest
307
308
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000309 def find_item_native_case(root_path, item):
310 """Gets the native path case of a single item based at root_path.
311
312 There is no API to get the native path case of symlinks on OSX. So it
313 needs to be done the slow way.
314 """
315 if item == '..':
316 return item
317
318 item = item.lower()
319 for element in os.listdir(root_path):
320 if element.lower() == item:
321 return element
322
323
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000324 def get_native_path_case(path):
325 """Returns the native path case for an existing file.
326
327 Technically, it's only HFS+ on OSX that is case preserving and
328 insensitive. It's the default setting on HFS+ but can be changed.
329 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000330 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000331 if not isabs(path):
332 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000333 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000334 if path.startswith('/dev'):
335 # /dev is not visible from Carbon, causing an exception.
336 return path
337
338 # Starts assuming there is no symlink along the path.
339 resolved = _native_case(path)
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000340 if path.lower() in (resolved.lower(), resolved.lower() + './'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000341 # This code path is incredibly faster.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000342 logging.debug('get_native_path_case(%s) = %s' % (path, resolved))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000343 return resolved
344
345 # There was a symlink, process it.
346 base, symlink, rest = _split_at_symlink_native(None, path)
347 assert symlink, (path, base, symlink, rest, resolved)
348 prev = base
349 base = safe_join(_native_case(base), symlink)
350 assert len(base) > len(prev)
351 while rest:
352 prev = base
353 relbase, symlink, rest = _split_at_symlink_native(base, rest)
354 base = safe_join(base, relbase)
355 assert len(base) > len(prev), (prev, base, symlink)
356 if symlink:
357 base = safe_join(base, symlink)
358 assert len(base) > len(prev), (prev, base, symlink)
359 # Make sure no symlink was resolved.
360 assert base.lower() == path.lower(), (base, path)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000361 logging.debug('get_native_path_case(%s) = %s' % (path, base))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000362 return base
363
364
365else: # OSes other than Windows and OSX.
366
367
368 # On non-windows, keep the stdlib behavior.
369 isabs = os.path.isabs
370
371
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000372 def find_item_native_case(root, item):
373 """Gets the native path case of a single item based at root_path."""
374 if item == '..':
375 return item
376
377 root = get_native_path_case(root)
378 return os.path.basename(get_native_path_case(os.path.join(root, item)))
379
380
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000381 def get_native_path_case(path):
382 """Returns the native path case for an existing file.
383
384 On OSes other than OSX and Windows, assume the file system is
385 case-sensitive.
386
387 TODO(maruel): This is not strictly true. Implement if necessary.
388 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000389 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000390 if not isabs(path):
391 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000392 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000393 # Give up on cygwin, as GetLongPathName() can't be called.
394 # Linux traces tends to not be normalized so use this occasion to normalize
395 # it. This function implementation already normalizes the path on the other
396 # OS so this needs to be done here to be coherent between OSes.
397 out = os.path.normpath(path)
398 if path.endswith(os.path.sep) and not out.endswith(os.path.sep):
399 return out + os.path.sep
400 return out
401
402
403if sys.platform != 'win32': # All non-Windows OSes.
404
405
406 def safe_join(*args):
407 """Joins path elements like os.path.join() but doesn't abort on absolute
408 path.
409
410 os.path.join('foo', '/bar') == '/bar'
411 but safe_join('foo', '/bar') == 'foo/bar'.
412 """
413 out = ''
414 for element in args:
415 if element.startswith(os.path.sep):
416 if out.endswith(os.path.sep):
417 out += element[1:]
418 else:
419 out += element
420 else:
421 if out.endswith(os.path.sep):
422 out += element
423 else:
424 out += os.path.sep + element
425 return out
426
427
428 def split_at_symlink(base_dir, relfile):
429 """Scans each component of relfile and cut the string at the symlink if
430 there is any.
431
432 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if
433 not symlink was found.
434 """
435 if base_dir:
436 assert relfile
437 assert os.path.isabs(base_dir)
438 index = 0
439 else:
440 assert os.path.isabs(relfile)
441 index = 1
442
443 def at_root(rest):
444 if base_dir:
445 return safe_join(base_dir, rest)
446 return rest
447
448 while True:
449 try:
450 index = relfile.index(os.path.sep, index)
451 except ValueError:
452 index = len(relfile)
453 full = at_root(relfile[:index])
454 if os.path.islink(full):
455 # A symlink!
456 base = os.path.dirname(relfile[:index])
457 symlink = os.path.basename(relfile[:index])
458 rest = relfile[index:]
459 logging.debug(
460 'split_at_symlink(%s, %s) -> (%s, %s, %s)' %
461 (base_dir, relfile, base, symlink, rest))
462 return base, symlink, rest
463 if index == len(relfile):
464 break
465 index += 1
466 return relfile, None, None
467
468
469def fix_python_path(cmd):
470 """Returns the fixed command line to call the right python executable."""
471 out = cmd[:]
472 if out[0] == 'python':
473 out[0] = sys.executable
474 elif out[0].endswith('.py'):
475 out.insert(0, sys.executable)
476 return out
477
478
479def create_thunk():
480 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
481 os.write(
482 handle,
483 (
484 'import subprocess\n'
485 'import sys\n'
486 'sys.exit(subprocess.call(sys.argv[2:]))\n'
487 ))
488 os.close(handle)
489 return name
490
491
492def strace_process_quoted_arguments(text):
493 """Extracts quoted arguments on a string and return the arguments as a list.
494
495 Implemented as an automaton. Supports incomplete strings in the form
496 '"foo"...'.
497
498 Example:
499 With text = '"foo", "bar"', the function will return ['foo', 'bar']
500
501 TODO(maruel): Implement escaping.
502 """
503 # All the possible states of the DFA.
504 ( NEED_QUOTE, # Begining of a new arguments.
505 INSIDE_STRING, # Inside an argument.
506 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
507 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
508 # a serie of 3 dots or a comma.
509 NEED_SPACE, # Right after a comma
510 NEED_DOT_2, # Found a dot, need a second one.
511 NEED_DOT_3, # Found second dot, need a third one.
512 NEED_COMMA, # Found third dot, need a comma.
513 ) = range(8)
514
515 state = NEED_QUOTE
516 out = []
517 for index, char in enumerate(text):
518 if char == '"':
519 if state == NEED_QUOTE:
520 state = INSIDE_STRING
521 # A new argument was found.
522 out.append('')
523 elif state == INSIDE_STRING:
524 # The argument is now closed.
525 state = NEED_COMMA_OR_DOT
526 elif state == ESCAPED:
527 out[-1] += char
528 state = INSIDE_STRING
529 else:
530 raise ValueError(
531 'Can\'t process char at column %d for: %r' % (index, text),
532 index,
533 text)
534 elif char == ',':
535 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
536 state = NEED_SPACE
537 elif state == INSIDE_STRING:
538 out[-1] += char
539 elif state == ESCAPED:
540 out[-1] += char
541 state = INSIDE_STRING
542 else:
543 raise ValueError(
544 'Can\'t process char at column %d for: %r' % (index, text),
545 index,
546 text)
547 elif char == ' ':
548 if state == NEED_SPACE:
549 state = NEED_QUOTE
550 elif state == INSIDE_STRING:
551 out[-1] += char
552 elif state == ESCAPED:
553 out[-1] += char
554 state = INSIDE_STRING
555 else:
556 raise ValueError(
557 'Can\'t process char at column %d for: %r' % (index, text),
558 index,
559 text)
560 elif char == '.':
561 if state == NEED_COMMA_OR_DOT:
562 # The string is incomplete, this mean the strace -s flag should be
563 # increased.
564 state = NEED_DOT_2
565 elif state == NEED_DOT_2:
566 state = NEED_DOT_3
567 elif state == NEED_DOT_3:
568 state = NEED_COMMA
569 elif state == INSIDE_STRING:
570 out[-1] += char
571 elif state == ESCAPED:
572 out[-1] += char
573 state = INSIDE_STRING
574 else:
575 raise ValueError(
576 'Can\'t process char at column %d for: %r' % (index, text),
577 index,
578 text)
579 elif char == '\\':
580 if state == ESCAPED:
581 out[-1] += char
582 state = INSIDE_STRING
583 elif state == INSIDE_STRING:
584 state = ESCAPED
585 else:
586 raise ValueError(
587 'Can\'t process char at column %d for: %r' % (index, text),
588 index,
589 text)
590 else:
591 if state == INSIDE_STRING:
592 out[-1] += char
593 else:
594 raise ValueError(
595 'Can\'t process char at column %d for: %r' % (index, text),
596 index,
597 text)
598 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
599 raise ValueError(
600 'String is incorrectly terminated: %r' % text,
601 text)
602 return out
603
604
605def read_json(filepath):
606 with open(filepath, 'r') as f:
607 return json.load(f)
608
609
610def write_json(filepath_or_handle, data, dense):
611 """Writes data into filepath or file handle encoded as json.
612
613 If dense is True, the json is packed. Otherwise, it is human readable.
614 """
615 if hasattr(filepath_or_handle, 'write'):
616 if dense:
617 filepath_or_handle.write(
618 json.dumps(data, sort_keys=True, separators=(',',':')))
619 else:
620 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
621 else:
622 with open(filepath_or_handle, 'wb') as f:
623 if dense:
624 json.dump(data, f, sort_keys=True, separators=(',',':'))
625 else:
626 json.dump(data, f, sort_keys=True, indent=2)
627
628
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000629def assert_is_renderable(pseudo_string):
630 """Asserts the input is a valid object to be processed by render()."""
631 assert (
632 isinstance(pseudo_string, (None.__class__, unicode)) or
633 hasattr(pseudo_string, 'render')), repr(pseudo_string)
634
635
636def render(pseudo_string):
637 """Converts the pseudo-string to an unicode string."""
638 assert_is_renderable(pseudo_string)
639 if isinstance(pseudo_string, (None.__class__, unicode)):
640 return pseudo_string
641 return pseudo_string.render()
642
643
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000644class Results(object):
645 """Results of a trace session."""
646
647 class _TouchedObject(object):
648 """Something, a file or a directory, that was accessed."""
649 def __init__(self, root, path, tainted, size, nb_files):
650 logging.debug(
651 '%s(%s, %s, %s, %s, %s)' %
652 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000653 assert_is_renderable(root)
654 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000655 self.root = root
656 self.path = path
657 self.tainted = tainted
658 self.nb_files = nb_files
659 # Can be used as a cache or a default value, depending on context.
660 self._size = size
661 # These are cache only.
662 self._real_path = None
663
664 # Check internal consistency.
665 assert path, path
666 assert tainted or bool(root) != bool(isabs(path)), (root, path)
667 assert tainted or (
668 not os.path.exists(self.full_path) or
669 (self.full_path == get_native_path_case(self.full_path))), (
670 tainted, self.full_path, get_native_path_case(self.full_path))
671
672 @property
673 def existent(self):
674 return self.size != -1
675
676 @property
677 def full_path(self):
678 if self.root:
679 return os.path.join(self.root, self.path)
680 return self.path
681
682 @property
683 def real_path(self):
684 """Returns the path with symlinks resolved."""
685 if not self._real_path:
686 self._real_path = os.path.realpath(self.full_path)
687 return self._real_path
688
689 @property
690 def size(self):
691 """File's size. -1 is not existent."""
692 if self._size is None and not self.tainted:
693 try:
694 self._size = os.stat(self.full_path).st_size
695 except OSError:
696 self._size = -1
697 return self._size
698
699 def flatten(self):
700 """Returns a dict representing this object.
701
702 A 'size' of 0 means the file was only touched and not read.
703 """
704 return {
705 'path': self.path,
706 'size': self.size,
707 }
708
709 def replace_variables(self, variables):
710 """Replaces the root of this File with one of the variables if it matches.
711
712 If a variable replacement occurs, the cloned object becomes tainted.
713 """
714 for variable, root_path in variables.iteritems():
715 if self.path.startswith(root_path):
716 return self._clone(
717 self.root, variable + self.path[len(root_path):], True)
718 # No need to clone, returns ourself.
719 return self
720
721 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000722 """Returns a clone of itself with 'root' stripped off.
723
724 Note that the file is kept if it is either accessible from a symlinked
725 path that was used to access the file or through the real path.
726 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000727 # Check internal consistency.
728 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root
729 if not self.full_path.startswith(root):
730 # Now try to resolve the symlinks to see if it can be reached this way.
731 # Only try *after* trying without resolving symlink.
732 if not self.real_path.startswith(root):
733 return None
734 path = self.real_path
735 else:
736 path = self.full_path
737 return self._clone(root, path[len(root):], self.tainted)
738
739 def _clone(self, new_root, new_path, tainted):
740 raise NotImplementedError(self.__class__.__name__)
741
742 class File(_TouchedObject):
743 """A file that was accessed. May not be present anymore.
744
745 If tainted is true, it means it is not a real path anymore as a variable
746 replacement occured.
747
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000748 If only_touched is True, this means the file was probed for existence, and
749 it is existent, but was never _opened_. If only_touched is True, the file
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000750 must have existed.
751 """
752 def __init__(self, root, path, tainted, size):
753 super(Results.File, self).__init__(root, path, tainted, size, 1)
754
755 def _clone(self, new_root, new_path, tainted):
756 """Clones itself keeping meta-data."""
757 # Keep the self.size and self._real_path caches for performance reason. It
758 # is also important when the file becomes tainted (with a variable instead
759 # of the real path) since self.path is not an on-disk path anymore so
760 # out._size cannot be updated.
761 out = self.__class__(new_root, new_path, tainted, self.size)
762 out._real_path = self._real_path
763 return out
764
765 class Directory(_TouchedObject):
766 """A directory of files. Must exist."""
767 def __init__(self, root, path, tainted, size, nb_files):
768 """path='.' is a valid value and must be handled appropriately."""
769 assert not path.endswith(os.path.sep), path
770 super(Results.Directory, self).__init__(
771 root, path + os.path.sep, tainted, size, nb_files)
772 # For a Directory instance, self.size is not a cache, it's an actual value
773 # that is never modified and represents the total size of the files
774 # contained in this directory. It is possible that the directory is empty
775 # so that size == 0; this happens if there's only an invalid symlink in
776 # it.
777
778 def flatten(self):
779 out = super(Results.Directory, self).flatten()
780 out['nb_files'] = self.nb_files
781 return out
782
783 def _clone(self, new_root, new_path, tainted):
784 """Clones itself keeping meta-data."""
785 out = self.__class__(
786 new_root,
787 new_path.rstrip(os.path.sep),
788 tainted,
789 self.size,
790 self.nb_files)
791 out._real_path = self._real_path
792 return out
793
794 class Process(object):
795 """A process that was traced.
796
797 Contains references to the files accessed by this process and its children.
798 """
799 def __init__(self, pid, files, executable, command, initial_cwd, children):
800 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
801 self.pid = pid
802 self.files = sorted(files, key=lambda x: x.path)
803 self.children = children
804 self.executable = executable
805 self.command = command
806 self.initial_cwd = initial_cwd
807
808 # Check internal consistency.
809 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
810 f.path for f in self.files)
811 assert isinstance(self.children, list)
812 assert isinstance(self.files, list)
813
814 @property
815 def all(self):
816 for child in self.children:
817 for i in child.all:
818 yield i
819 yield self
820
821 def flatten(self):
822 return {
823 'children': [c.flatten() for c in self.children],
824 'command': self.command,
825 'executable': self.executable,
826 'files': [f.flatten() for f in self.files],
827 'initial_cwd': self.initial_cwd,
828 'pid': self.pid,
829 }
830
831 def strip_root(self, root):
832 assert isabs(root) and root.endswith(os.path.sep), root
833 # Loads the files after since they are constructed as objects.
834 out = self.__class__(
835 self.pid,
836 filter(None, (f.strip_root(root) for f in self.files)),
837 self.executable,
838 self.command,
839 self.initial_cwd,
840 [c.strip_root(root) for c in self.children])
841 logging.debug(
842 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
843 return out
844
845 def __init__(self, process):
846 self.process = process
847 # Cache.
848 self._files = None
849
850 def flatten(self):
851 return {
852 'root': self.process.flatten(),
853 }
854
855 @property
856 def files(self):
857 if self._files is None:
858 self._files = sorted(
859 sum((p.files for p in self.process.all), []),
860 key=lambda x: x.path)
861 return self._files
862
863 @property
864 def existent(self):
865 return [f for f in self.files if f.existent]
866
867 @property
868 def non_existent(self):
869 return [f for f in self.files if not f.existent]
870
871 def strip_root(self, root):
872 """Returns a clone with all the files outside the directory |root| removed
873 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000874
875 It keeps files accessible through the |root| directory or that have been
876 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000877 """
878 # Resolve any symlink
879 root = os.path.realpath(root)
880 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep
881 logging.debug('strip_root(%s)' % root)
882 return Results(self.process.strip_root(root))
883
884
885class ApiBase(object):
886 """OS-agnostic API to trace a process and its children."""
887 class Context(object):
888 """Processes one log line at a time and keeps the list of traced processes.
889
890 The parsing is complicated by the fact that logs are traced out of order for
891 strace but in-order for dtrace and logman. In addition, on Windows it is
892 very frequent that processids are reused so a flat list cannot be used. But
893 at the same time, it is impossible to faithfully construct a graph when the
894 logs are processed out of order. So both a tree and a flat mapping are used,
895 the tree is the real process tree, while the flat mapping stores the last
896 valid process for the corresponding processid. For the strace case, the
897 tree's head is guessed at the last moment.
898 """
899 class Process(object):
900 """Keeps context for one traced child process.
901
902 Logs all the files this process touched. Ignores directories.
903 """
904 def __init__(self, blacklist, pid, initial_cwd):
905 # Check internal consistency.
906 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000907 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000908 self.pid = pid
909 # children are Process instances.
910 self.children = []
911 self.initial_cwd = initial_cwd
912 self.cwd = None
913 self.files = set()
914 self.only_touched = set()
915 self.executable = None
916 self.command = None
917 self._blacklist = blacklist
918
919 def to_results_process(self):
920 """Resolves file case sensitivity and or late-bound strings."""
921 # When resolving files, it's normal to get dupe because a file could be
922 # opened multiple times with different case. Resolve the deduplication
923 # here.
924 def fix_path(x):
925 """Returns the native file path case.
926
927 Converts late-bound strings.
928 """
929 if not x:
930 # Do not convert None instance to 'None'.
931 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000932 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000933 if os.path.isabs(x):
934 # If the path is not absolute, which tends to happen occasionally on
935 # Windows, it is not possible to get the native path case so ignore
936 # that trace. It mostly happens for 'executable' value.
937 x = get_native_path_case(x)
938 return x
939
940 def fix_and_blacklist_path(x):
941 x = fix_path(x)
942 if not x:
943 return
944 # The blacklist needs to be reapplied, since path casing could
945 # influence blacklisting.
946 if self._blacklist(x):
947 return
948 return x
949
950 # Filters out directories. Some may have passed through.
951 files = set(f for f in map(fix_and_blacklist_path, self.files) if f)
952 only_touched = set(
953 f for f in map(fix_and_blacklist_path, self.only_touched) if f)
954 only_touched -= files
955
956 files = [
957 Results.File(None, f, False, None) for f in files
958 if not os.path.isdir(f)
959 ]
960 # Using 0 as size means the file's content is ignored since the file was
961 # never opened for I/O.
962 files.extend(
963 Results.File(None, f, False, 0) for f in only_touched
964 if not os.path.isdir(f)
965 )
966 return Results.Process(
967 self.pid,
968 files,
969 fix_path(self.executable),
970 self.command,
971 fix_path(self.initial_cwd),
972 [c.to_results_process() for c in self.children])
973
974 def add_file(self, filepath, touch_only):
975 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000976 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000977 return
978 logging.debug('add_file(%d, %s, %s)' % (self.pid, filepath, touch_only))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000979 # Note that filepath and not render(filepath) is added. It is
980 # because filepath could be something else than a string, like a
981 # RelativePath instance for dtrace logs.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000982 if touch_only:
983 self.only_touched.add(filepath)
984 else:
985 self.files.add(filepath)
986
987 def __init__(self, blacklist):
988 self.blacklist = blacklist
989 # Initial process.
990 self.root_process = None
991 # dict to accelerate process lookup, to not have to lookup the whole graph
992 # each time.
993 self._process_lookup = {}
994
995 class Tracer(object):
996 """During it's lifetime, the tracing subsystem is enabled."""
997 def __init__(self, logname):
998 self._logname = logname
999 self._lock = threading.Lock()
1000 self._traces = []
1001 self._initialized = True
1002
1003 def trace(self, cmd, cwd, tracename, output):
1004 """Runs the OS-specific trace program on an executable.
1005
1006 Arguments:
1007 - cmd: The command (a list) to run.
1008 - cwd: Current directory to start the child process in.
1009 - tracename: Name of the trace in the logname file.
1010 - output: If False, redirects output to PIPEs.
1011
1012 Returns a tuple (resultcode, output) and updates the internal trace
1013 entries.
1014 """
1015 # The implementation adds an item to self._traces.
1016 raise NotImplementedError(self.__class__.__name__)
1017
1018 def close(self, _timeout=None):
1019 """Saves the meta-data in the logname file.
1020
1021 For kernel-based tracing, stops the tracing subsystem.
1022
1023 Must not be used manually when using 'with' construct.
1024 """
1025 with self._lock:
1026 assert self._initialized
1027 try:
1028 data = {
1029 'traces': self._traces,
1030 }
1031 write_json(self._logname, data, False)
1032 finally:
1033 self._initialized = False
1034
1035 def post_process_log(self):
1036 """Post-processes the log so it becomes faster to load afterward.
1037
1038 Must not be used manually when using 'with' construct.
1039 """
1040 assert not self._initialized, 'Must stop tracing first.'
1041
1042 def __enter__(self):
1043 """Enables 'with' statement."""
1044 return self
1045
1046 def __exit__(self, exc_type, exc_value, traceback):
1047 """Enables 'with' statement."""
1048 self.close()
1049 # If an exception was thrown, do not process logs.
1050 if not exc_type:
1051 self.post_process_log()
1052
1053 def get_tracer(self, logname):
1054 """Returns an ApiBase.Tracer instance.
1055
1056 Initializes the tracing subsystem, which is a requirement for kernel-based
1057 tracers. Only one tracer instance should be live at a time!
1058
1059 logname is the filepath to the json file that will contain the meta-data
1060 about the logs.
1061 """
1062 return self.Tracer(logname)
1063
1064 @staticmethod
1065 def clean_trace(logname):
1066 """Deletes an old log."""
1067 raise NotImplementedError()
1068
1069 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001070 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001071 """Processes trace logs and returns the files opened and the files that do
1072 not exist.
1073
1074 It does not track directories.
1075
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001076 Arguments:
1077 - logname: must be an absolute path.
1078 - blacklist: must be a lambda.
1079 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001080
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001081 Most of the time, files that do not exist are temporary test files that
1082 should be put in /tmp instead. See http://crbug.com/116251.
1083
1084 Returns a list of dict with keys:
1085 - results: A Results instance.
1086 - trace: The corresponding tracename parameter provided to
1087 get_tracer().trace().
1088 - output: Output gathered during execution, if get_tracer().trace(...,
1089 output=False) was used.
1090 """
1091 raise NotImplementedError(cls.__class__.__name__)
1092
1093
1094class Strace(ApiBase):
1095 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001096 @staticmethod
1097 def load_filename(filename):
1098 """Parses a filename in a log."""
1099 assert isinstance(filename, str)
1100 out = ''
1101 i = 0
1102 while i < len(filename):
1103 c = filename[i]
1104 if c == '\\':
1105 out += chr(int(filename[i+1:i+4], 8))
1106 i += 4
1107 else:
1108 out += c
1109 i += 1
1110 # TODO(maruel): That's not necessarily true that the current code page is
1111 # utf-8.
1112 return out.decode('utf-8')
1113
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001114 class Context(ApiBase.Context):
1115 """Processes a strace log line and keeps the list of existent and non
1116 existent files accessed.
1117
1118 Ignores directories.
1119
1120 Uses late-binding to processes the cwd of each process. The problem is that
1121 strace generates one log file per process it traced but doesn't give any
1122 information about which process was started when and by who. So we don't
1123 even know which process is the initial one. So process the logs out of
1124 order and use late binding with RelativePath to be able to deduce the
1125 initial directory of each process once all the logs are parsed.
1126 """
1127 class Process(ApiBase.Context.Process):
1128 """Represents the state of a process.
1129
1130 Contains all the information retrieved from the pid-specific log.
1131 """
1132 # Function names are using ([a-z_0-9]+)
1133 # This is the most common format. function(args) = result
1134 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.+?)\)\s+= (.+)$')
1135 # An interrupted function call, only grab the minimal header.
1136 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
1137 # A resumed function call.
1138 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
1139 # A process received a signal.
1140 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
1141 # A process didn't handle a signal. Ignore any junk appearing before,
1142 # because the process was forcibly killed so it won't open any new file.
1143 RE_KILLED = re.compile(
1144 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
1145 # The process has exited.
1146 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
1147 # A call was canceled. Ignore any prefix.
1148 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
1149 # Happens when strace fails to even get the function name.
1150 UNNAMED_FUNCTION = '????'
1151
1152 # Corner-case in python, a class member function decorator must not be
1153 # @staticmethod.
1154 def parse_args(regexp, expect_zero): # pylint: disable=E0213
1155 """Automatically convert the str 'args' into a list of processed
1156 arguments.
1157
1158 Arguments:
1159 - regexp is used to parse args.
1160 - expect_zero: one of True, False or None.
1161 - True: will check for result.startswith('0') first and will ignore
1162 the trace line completely otherwise. This is important because for
1163 many functions, the regexp will not process if the call failed.
1164 - False: will check for not result.startswith(('?', '-1')) for the
1165 same reason than with True.
1166 - None: ignore result.
1167 """
1168 def meta_hook(function):
1169 assert function.__name__.startswith('handle_')
1170 def hook(self, args, result):
1171 if expect_zero is True and not result.startswith('0'):
1172 return
1173 if expect_zero is False and result.startswith(('?', '-1')):
1174 return
1175 match = re.match(regexp, args)
1176 if not match:
1177 raise TracingFailure(
1178 'Failed to parse %s(%s) = %s' %
1179 (function.__name__[len('handle_'):], args, result),
1180 None, None, None)
1181 return function(self, match.groups(), result)
1182 return hook
1183 return meta_hook
1184
1185 class RelativePath(object):
1186 """A late-bound relative path."""
1187 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001188 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001189 self.parent = parent
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001190 assert isinstance(value, (None.__class__, str)), repr(value)
1191 self.value = Strace.load_filename(value) if value else value
1192 if self.value:
1193 assert '\\' not in self.value, value
1194 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001195
1196 def render(self):
1197 """Returns the current directory this instance is representing.
1198
1199 This function is used to return the late-bound value.
1200 """
1201 if self.value and self.value.startswith(u'/'):
1202 # An absolute path.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001203 # TODO(maruel): This is wrong, we can't assert it is utf-8.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001204 return self.value
1205 parent = self.parent.render() if self.parent else u'<None>'
1206 if self.value:
1207 return os.path.normpath(os.path.join(parent, self.value))
1208 return parent
1209
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001210 def __init__(self, root, pid):
1211 """Keeps enough information to be able to guess the original process
1212 root.
1213
1214 strace doesn't store which process was the initial process. So more
1215 information needs to be kept so the graph can be reconstructed from the
1216 flat map.
1217 """
1218 logging.info('%s(%d)' % (self.__class__.__name__, pid))
1219 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
1220 assert isinstance(root, ApiBase.Context)
1221 self._root = weakref.ref(root)
1222 # The dict key is the function name of the pending call, like 'open'
1223 # or 'execve'.
1224 self._pending_calls = {}
1225 self._line_number = 0
1226 # Current directory when the process started.
1227 self.initial_cwd = self.RelativePath(self._root(), None)
1228 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001229 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001230
1231 def get_cwd(self):
1232 """Returns the best known value of cwd."""
1233 return self.cwd or self.initial_cwd
1234
1235 def render(self):
1236 """Returns the string value of the RelativePath() object.
1237
1238 Used by RelativePath. Returns the initial directory and not the
1239 current one since the current directory 'cwd' validity is time-limited.
1240
1241 The validity is only guaranteed once all the logs are processed.
1242 """
1243 return self.initial_cwd.render()
1244
1245 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001246 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001247 self._line_number += 1
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001248 if self._done:
1249 raise TracingFailure(
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001250 'Found a trace for a terminated process or corrupted log',
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001251 None, None, None)
1252
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001253 if self.RE_SIGNAL.match(line):
1254 # Ignore signals.
1255 return
1256
1257 try:
1258 match = self.RE_KILLED.match(line)
1259 if match:
1260 # Converts a '+++ killed by Foo +++' trace into an exit_group().
1261 self.handle_exit_group(match.group(1), None)
1262 return
1263
1264 match = self.RE_PROCESS_EXITED.match(line)
1265 if match:
1266 # Converts a '+++ exited with 1 +++' trace into an exit_group()
1267 self.handle_exit_group(match.group(1), None)
1268 return
1269
1270 match = self.RE_UNFINISHED.match(line)
1271 if match:
1272 if match.group(1) in self._pending_calls:
1273 raise TracingFailure(
1274 'Found two unfinished calls for the same function',
1275 None, None, None,
1276 self._pending_calls)
1277 self._pending_calls[match.group(1)] = (
1278 match.group(1) + match.group(2))
1279 return
1280
1281 match = self.RE_UNAVAILABLE.match(line)
1282 if match:
1283 # This usually means a process was killed and a pending call was
1284 # canceled.
1285 # TODO(maruel): Look up the last exit_group() trace just above and
1286 # make sure any self._pending_calls[anything] is properly flushed.
1287 return
1288
1289 match = self.RE_RESUMED.match(line)
1290 if match:
1291 if match.group(1) not in self._pending_calls:
1292 raise TracingFailure(
1293 'Found a resumed call that was not logged as unfinished',
1294 None, None, None,
1295 self._pending_calls)
1296 pending = self._pending_calls.pop(match.group(1))
1297 # Reconstruct the line.
1298 line = pending + match.group(2)
1299
1300 match = self.RE_HEADER.match(line)
1301 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001302 # The line is corrupted. It happens occasionally when a process is
1303 # killed forcibly with activity going on. Assume the process died.
1304 # No other line can be processed afterward.
1305 self._done = True
1306 return
1307
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001308 if match.group(1) == self.UNNAMED_FUNCTION:
1309 return
1310
1311 # It's a valid line, handle it.
1312 handler = getattr(self, 'handle_%s' % match.group(1), None)
1313 if not handler:
1314 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1315 return handler(match.group(2), match.group(3))
1316 except TracingFailure, e:
1317 # Hack in the values since the handler could be a static function.
1318 e.pid = self.pid
1319 e.line = line
1320 e.line_number = self._line_number
1321 # Re-raise the modified exception.
1322 raise
1323 except (KeyError, NotImplementedError, ValueError), e:
1324 raise TracingFailure(
1325 'Trace generated a %s exception: %s' % (
1326 e.__class__.__name__, str(e)),
1327 self.pid,
1328 self._line_number,
1329 line,
1330 e)
1331
1332 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1333 def handle_access(self, args, _result):
1334 self._handle_file(args[0], True)
1335
1336 @parse_args(r'^\"(.+?)\"$', True)
1337 def handle_chdir(self, args, _result):
1338 """Updates cwd."""
1339 self.cwd = self.RelativePath(self, args[0])
1340 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1341
1342 def handle_clone(self, _args, result):
1343 """Transfers cwd."""
1344 if result.startswith(('?', '-1')):
1345 # The call failed.
1346 return
1347 # Update the other process right away.
1348 childpid = int(result)
1349 child = self._root().get_or_set_proc(childpid)
1350 if child.parentid is not None or childpid in self.children:
1351 raise TracingFailure(
1352 'Found internal inconsitency in process lifetime detection '
1353 'during a clone() call',
1354 None, None, None)
1355
1356 # Copy the cwd object.
1357 child.initial_cwd = self.get_cwd()
1358 child.parentid = self.pid
1359 # It is necessary because the logs are processed out of order.
1360 self.children.append(child)
1361
1362 def handle_close(self, _args, _result):
1363 pass
1364
1365 def handle_chmod(self, _args, _result):
1366 pass
1367
1368 def handle_creat(self, _args, _result):
1369 # Ignore files created, since they didn't need to exist.
1370 pass
1371
1372 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1373 def handle_execve(self, args, _result):
1374 # Even if in practice execve() doesn't returns when it succeeds, strace
1375 # still prints '0' as the result.
1376 filepath = args[0]
1377 self._handle_file(filepath, False)
1378 self.executable = self.RelativePath(self.get_cwd(), filepath)
1379 self.command = strace_process_quoted_arguments(args[1])
1380
1381 def handle_exit_group(self, _args, _result):
1382 """Removes cwd."""
1383 self.cwd = None
1384
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001385 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1386 def handle_faccessat(self, args, _results):
1387 if args[0] == 'AT_FDCWD':
1388 self._handle_file(args[1], True)
1389 else:
1390 raise Exception('Relative faccess not implemented.')
1391
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001392 def handle_fork(self, args, result):
1393 self._handle_unknown('fork', args, result)
1394
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001395 def handle_futex(self, _args, _result):
1396 pass
1397
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001398 def handle_getcwd(self, _args, _result):
1399 pass
1400
1401 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1402 def handle_link(self, args, _result):
1403 self._handle_file(args[0], False)
1404 self._handle_file(args[1], False)
1405
1406 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1407 def handle_lstat(self, args, _result):
1408 self._handle_file(args[0], True)
1409
1410 def handle_mkdir(self, _args, _result):
1411 pass
1412
1413 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1414 def handle_open(self, args, _result):
1415 if 'O_DIRECTORY' in args[1]:
1416 return
1417 self._handle_file(args[0], False)
1418
1419 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1420 def handle_openat(self, args, _result):
1421 if 'O_DIRECTORY' in args[2]:
1422 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001423 if args[0] == 'AT_FDCWD':
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001424 self._handle_file(args[1], False)
1425 else:
1426 # TODO(maruel): Implement relative open if necessary instead of the
1427 # AT_FDCWD flag, let's hope not since this means tracking all active
1428 # directory handles.
1429 raise Exception('Relative open via openat not implemented.')
1430
1431 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1432 def handle_readlink(self, args, _result):
1433 self._handle_file(args[0], False)
1434
1435 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1436 def handle_rename(self, args, _result):
1437 self._handle_file(args[0], False)
1438 self._handle_file(args[1], False)
1439
1440 def handle_rmdir(self, _args, _result):
1441 pass
1442
1443 def handle_setxattr(self, _args, _result):
1444 pass
1445
1446 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1447 def handle_stat(self, args, _result):
1448 self._handle_file(args[0], True)
1449
1450 def handle_symlink(self, _args, _result):
1451 pass
1452
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001453 @parse_args(r'^\"(.+?)\", \d+', True)
1454 def handle_truncate(self, args, _result):
1455 self._handle_file(args[0], False)
1456
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001457 def handle_unlink(self, _args, _result):
1458 # In theory, the file had to be created anyway.
1459 pass
1460
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001461 def handle_unlinkat(self, _args, _result):
1462 # In theory, the file had to be created anyway.
1463 pass
1464
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001465 def handle_statfs(self, _args, _result):
1466 pass
1467
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001468 def handle_utimensat(self, _args, _result):
1469 pass
1470
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001471 def handle_vfork(self, args, result):
1472 self._handle_unknown('vfork', args, result)
1473
1474 @staticmethod
1475 def _handle_unknown(function, args, result):
1476 raise TracingFailure(
1477 'Unexpected/unimplemented trace %s(%s)= %s' %
1478 (function, args, result),
1479 None, None, None)
1480
1481 def _handle_file(self, filepath, touch_only):
1482 filepath = self.RelativePath(self.get_cwd(), filepath)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001483 #assert not touch_only, render(filepath)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001484 self.add_file(filepath, touch_only)
1485
1486 def __init__(self, blacklist, initial_cwd):
1487 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001488 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001489 self.initial_cwd = initial_cwd
1490
1491 def render(self):
1492 """Returns the string value of the initial cwd of the root process.
1493
1494 Used by RelativePath.
1495 """
1496 return self.initial_cwd
1497
1498 def on_line(self, pid, line):
1499 """Transfers control into the Process.on_line() function."""
1500 self.get_or_set_proc(pid).on_line(line.strip())
1501
1502 def to_results(self):
1503 """Finds back the root process and verify consistency."""
1504 # TODO(maruel): Absolutely unecessary, fix me.
1505 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1506 if len(root) != 1:
1507 raise TracingFailure(
1508 'Found internal inconsitency in process lifetime detection '
1509 'while finding the root process',
1510 None,
1511 None,
1512 None,
1513 sorted(p.pid for p in root))
1514 self.root_process = root[0]
1515 process = self.root_process.to_results_process()
1516 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1517 raise TracingFailure(
1518 'Found internal inconsitency in process lifetime detection '
1519 'while looking for len(tree) == len(list)',
1520 None,
1521 None,
1522 None,
1523 sorted(self._process_lookup),
1524 sorted(p.pid for p in process.all))
1525 return Results(process)
1526
1527 def get_or_set_proc(self, pid):
1528 """Returns the Context.Process instance for this pid or creates a new one.
1529 """
1530 if not pid or not isinstance(pid, int):
1531 raise TracingFailure(
1532 'Unpexpected value for pid: %r' % pid,
1533 pid,
1534 None,
1535 None,
1536 pid)
1537 if pid not in self._process_lookup:
1538 self._process_lookup[pid] = self.Process(self, pid)
1539 return self._process_lookup[pid]
1540
1541 @classmethod
1542 def traces(cls):
1543 """Returns the list of all handled traces to pass this as an argument to
1544 strace.
1545 """
1546 prefix = 'handle_'
1547 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1548
1549 class Tracer(ApiBase.Tracer):
1550 MAX_LEN = 256
1551
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001552 def __init__(self, logname, use_sudo):
1553 super(Strace.Tracer, self).__init__(logname)
1554 self.use_sudo = use_sudo
1555
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001556 def trace(self, cmd, cwd, tracename, output):
1557 """Runs strace on an executable."""
1558 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1559 assert os.path.isabs(cmd[0]), cmd[0]
1560 assert os.path.isabs(cwd), cwd
1561 assert os.path.normpath(cwd) == cwd, cwd
1562 with self._lock:
1563 if not self._initialized:
1564 raise TracingFailure(
1565 'Called Tracer.trace() on an unitialized object',
1566 None, None, None, tracename)
1567 assert tracename not in (i['trace'] for i in self._traces)
1568 stdout = stderr = None
1569 if output:
1570 stdout = subprocess.PIPE
1571 stderr = subprocess.STDOUT
1572 # Ensure all file related APIs are hooked.
1573 traces = ','.join(Strace.Context.traces() + ['file'])
1574 trace_cmd = [
1575 'strace',
1576 '-ff',
1577 '-s', '%d' % self.MAX_LEN,
1578 '-e', 'trace=%s' % traces,
1579 '-o', self._logname + '.' + tracename,
1580 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001581 if self.use_sudo is True:
1582 trace_cmd.insert(0, 'sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001583 child = subprocess.Popen(
1584 trace_cmd + cmd,
1585 cwd=cwd,
1586 stdin=subprocess.PIPE,
1587 stdout=stdout,
1588 stderr=stderr)
1589 out = child.communicate()[0]
1590 # TODO(maruel): Walk the logs and figure out the root process would
1591 # simplify parsing the logs a *lot*.
1592 with self._lock:
1593 assert tracename not in (i['trace'] for i in self._traces)
1594 self._traces.append(
1595 {
1596 'cmd': cmd,
1597 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001598 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001599 # The pid of strace process, not very useful.
1600 'pid': child.pid,
1601 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001602 })
1603 return child.returncode, out
1604
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001605 def __init__(self, use_sudo=None):
1606 super(Strace, self).__init__()
1607 self.use_sudo = use_sudo
1608
1609 def get_tracer(self, logname):
1610 return self.Tracer(logname, self.use_sudo)
1611
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001612 @staticmethod
1613 def clean_trace(logname):
1614 if os.path.isfile(logname):
1615 os.remove(logname)
1616 # Also delete any pid specific file from previous traces.
1617 for i in glob.iglob(logname + '.*'):
1618 if i.rsplit('.', 1)[1].isdigit():
1619 os.remove(i)
1620
1621 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001622 def parse_log(cls, logname, blacklist, trace_name):
1623 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001624 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001625 data = read_json(logname)
1626 out = []
1627 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001628 if trace_name and item['trace'] != trace_name:
1629 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001630 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001631 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001632 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001633 }
1634 try:
1635 context = cls.Context(blacklist, item['cwd'])
1636 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
1637 pid = pidfile.rsplit('.', 1)[1]
1638 if pid.isdigit():
1639 pid = int(pid)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001640 for line in open(pidfile, 'rb'):
1641 context.on_line(pid, line)
1642 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001643 except TracingFailure:
1644 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001645 out.append(result)
1646 return out
1647
1648
1649class Dtrace(ApiBase):
1650 """Uses DTrace framework through dtrace. Requires root access.
1651
1652 Implies Mac OSX.
1653
1654 dtruss can't be used because it has compatibility issues with python.
1655
1656 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1657 get the absolute path of the 'cwd' dtrace variable from the probe.
1658
1659 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1660 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1661 manually.
1662
1663 errno is not printed in the log since this implementation currently only cares
1664 about files that were successfully opened.
1665 """
1666 class Context(ApiBase.Context):
1667 # Format: index pid function(args)
1668 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1669
1670 # Arguments parsing.
1671 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1672 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1673 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1674 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1675 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1676 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1677
1678 O_DIRECTORY = 0x100000
1679
1680 class Process(ApiBase.Context.Process):
1681 def __init__(self, *args):
1682 super(Dtrace.Context.Process, self).__init__(*args)
1683 self.cwd = self.initial_cwd
1684
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001685 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001686 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001687 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001688 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001689 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001690 # Process ID of the temporary script created by create_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001691 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001692 self._initial_cwd = initial_cwd
1693 self._line_number = 0
1694
1695 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001696 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001697 self._line_number += 1
1698 match = self.RE_HEADER.match(line)
1699 if not match:
1700 raise TracingFailure(
1701 'Found malformed line: %s' % line,
1702 None,
1703 self._line_number,
1704 line)
1705 fn = getattr(
1706 self,
1707 'handle_%s' % match.group(2).replace('-', '_'),
1708 self._handle_ignored)
1709 # It is guaranteed to succeed because of the regexp. Or at least I thought
1710 # it would.
1711 pid = int(match.group(1))
1712 try:
1713 return fn(pid, match.group(3))
1714 except TracingFailure, e:
1715 # Hack in the values since the handler could be a static function.
1716 e.pid = pid
1717 e.line = line
1718 e.line_number = self._line_number
1719 # Re-raise the modified exception.
1720 raise
1721 except (KeyError, NotImplementedError, ValueError), e:
1722 raise TracingFailure(
1723 'Trace generated a %s exception: %s' % (
1724 e.__class__.__name__, str(e)),
1725 pid,
1726 self._line_number,
1727 line,
1728 e)
1729
1730 def to_results(self):
1731 process = self.root_process.to_results_process()
1732 # Internal concistency check.
1733 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1734 raise TracingFailure(
1735 'Found internal inconsitency in process lifetime detection '
1736 'while looking for len(tree) == len(list)',
1737 None,
1738 None,
1739 None,
1740 sorted(self._process_lookup),
1741 sorted(p.pid for p in process.all))
1742 return Results(process)
1743
1744 def handle_dtrace_BEGIN(self, _pid, args):
1745 if not self.RE_DTRACE_BEGIN.match(args):
1746 raise TracingFailure(
1747 'Found internal inconsitency in dtrace_BEGIN log line',
1748 None, None, None)
1749
1750 def handle_proc_start(self, pid, args):
1751 """Transfers cwd.
1752
1753 The dtrace script already takes care of only tracing the processes that
1754 are child of the traced processes so there is no need to verify the
1755 process hierarchy.
1756 """
1757 if pid in self._process_lookup:
1758 raise TracingFailure(
1759 'Found internal inconsitency in proc_start: %d started two times' %
1760 pid,
1761 None, None, None)
1762 match = self.RE_PROC_START.match(args)
1763 if not match:
1764 raise TracingFailure(
1765 'Failed to parse arguments: %s' % args,
1766 None, None, None)
1767 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001768 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001769 proc = self.root_process = self.Process(
1770 self.blacklist, pid, self._initial_cwd)
1771 elif ppid in self._process_lookup:
1772 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1773 self._process_lookup[ppid].children.append(proc)
1774 else:
1775 # Another process tree, ignore.
1776 return
1777 self._process_lookup[pid] = proc
1778 logging.debug(
1779 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001780 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001781
1782 def handle_proc_exit(self, pid, _args):
1783 """Removes cwd."""
1784 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001785 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001786 self._process_lookup[pid].cwd = None
1787
1788 def handle_execve(self, pid, args):
1789 """Sets the process' executable.
1790
1791 TODO(maruel): Read command line arguments. See
1792 https://discussions.apple.com/thread/1980539 for an example.
1793 https://gist.github.com/1242279
1794
1795 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1796 :)
1797 """
1798 if not pid in self._process_lookup:
1799 # Another process tree, ignore.
1800 return
1801 match = self.RE_EXECVE.match(args)
1802 if not match:
1803 raise TracingFailure(
1804 'Failed to parse arguments: %r' % args,
1805 None, None, None)
1806 proc = self._process_lookup[pid]
1807 proc.executable = match.group(1)
1808 proc.command = self.process_escaped_arguments(match.group(3))
1809 if int(match.group(2)) != len(proc.command):
1810 raise TracingFailure(
1811 'Failed to parse execve() arguments: %s' % args,
1812 None, None, None)
1813
1814 def handle_chdir(self, pid, args):
1815 """Updates cwd."""
1816 if pid not in self._process_lookup:
1817 # Another process tree, ignore.
1818 return
1819 cwd = self.RE_CHDIR.match(args).group(1)
1820 if not cwd.startswith('/'):
1821 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1822 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1823 else:
1824 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1825 cwd2 = cwd
1826 self._process_lookup[pid].cwd = cwd2
1827
1828 def handle_open_nocancel(self, pid, args):
1829 """Redirects to handle_open()."""
1830 return self.handle_open(pid, args)
1831
1832 def handle_open(self, pid, args):
1833 if pid not in self._process_lookup:
1834 # Another process tree, ignore.
1835 return
1836 match = self.RE_OPEN.match(args)
1837 if not match:
1838 raise TracingFailure(
1839 'Failed to parse arguments: %s' % args,
1840 None, None, None)
1841 flag = int(match.group(2), 16)
1842 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1843 # Ignore directories.
1844 return
1845 self._handle_file(pid, match.group(1))
1846
1847 def handle_rename(self, pid, args):
1848 if pid not in self._process_lookup:
1849 # Another process tree, ignore.
1850 return
1851 match = self.RE_RENAME.match(args)
1852 if not match:
1853 raise TracingFailure(
1854 'Failed to parse arguments: %s' % args,
1855 None, None, None)
1856 self._handle_file(pid, match.group(1))
1857 self._handle_file(pid, match.group(2))
1858
1859 def _handle_file(self, pid, filepath):
1860 if not filepath.startswith('/'):
1861 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1862 # We can get '..' in the path.
1863 filepath = os.path.normpath(filepath)
1864 # Sadly, still need to filter out directories here;
1865 # saw open_nocancel(".", 0, 0) = 0 lines.
1866 if os.path.isdir(filepath):
1867 return
1868 self._process_lookup[pid].add_file(filepath, False)
1869
1870 def handle_ftruncate(self, pid, args):
1871 """Just used as a signal to kill dtrace, ignoring."""
1872 pass
1873
1874 @staticmethod
1875 def _handle_ignored(pid, args):
1876 """Is called for all the event traces that are not handled."""
1877 raise NotImplementedError('Please implement me')
1878
1879 @staticmethod
1880 def process_escaped_arguments(text):
1881 """Extracts escaped arguments on a string and return the arguments as a
1882 list.
1883
1884 Implemented as an automaton.
1885
1886 Example:
1887 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1888 function will return ['python2.7', '-c', 'print("hi")]
1889 """
1890 if not text.endswith('\\0'):
1891 raise ValueError('String is not null terminated: %r' % text, text)
1892 text = text[:-2]
1893
1894 def unescape(x):
1895 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1896 out = []
1897 escaped = False
1898 for i in x:
1899 if i == '\\' and not escaped:
1900 escaped = True
1901 continue
1902 escaped = False
1903 out.append(i)
1904 return ''.join(out)
1905
1906 return [unescape(i) for i in text.split('\\001')]
1907
1908 class Tracer(ApiBase.Tracer):
1909 # pylint: disable=C0301
1910 #
1911 # To understand the following code, you'll want to take a look at:
1912 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
1913 # https://wikis.oracle.com/display/DTrace/Variables
1914 # http://docs.oracle.com/cd/E19205-01/820-4221/
1915 #
1916 # 0. Dump all the valid probes into a text file. It is important, you
1917 # want to redirect into a file and you don't want to constantly 'sudo'.
1918 # $ sudo dtrace -l > probes.txt
1919 #
1920 # 1. Count the number of probes:
1921 # $ wc -l probes.txt
1922 # 81823 # On OSX 10.7, including 1 header line.
1923 #
1924 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1925 # likes and skipping the header with NR>1:
1926 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1927 # dtrace
1928 # fbt
1929 # io
1930 # ip
1931 # lockstat
1932 # mach_trap
1933 # proc
1934 # profile
1935 # sched
1936 # syscall
1937 # tcp
1938 # vminfo
1939 #
1940 # 3. List of valid probes:
1941 # $ grep syscall probes.txt | less
1942 # or use dtrace directly:
1943 # $ sudo dtrace -l -P syscall | less
1944 #
1945 # trackedpid is an associative array where its value can be 0, 1 or 2.
1946 # 0 is for untracked processes and is the default value for items not
1947 # in the associative array.
1948 # 1 is for tracked processes.
1949 # 2 is for the script created by create_thunk() only. It is not tracked
1950 # itself but all its decendants are.
1951 #
1952 # The script will kill itself only once waiting_to_die == 1 and
1953 # current_processes == 0, so that both getlogin() was called and that
1954 # all traced processes exited.
1955 #
1956 # TODO(maruel): Use cacheable predicates. See
1957 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1958 D_CODE = """
1959 dtrace:::BEGIN {
1960 waiting_to_die = 0;
1961 current_processes = 0;
1962 logindex = 0;
1963 printf("%d %d %s_%s(\\"%s\\")\\n",
1964 logindex, PID, probeprov, probename, SCRIPT);
1965 logindex++;
1966 }
1967
1968 proc:::start /trackedpid[ppid]/ {
1969 trackedpid[pid] = 1;
1970 current_processes += 1;
1971 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1972 logindex, pid, probeprov, probename,
1973 ppid,
1974 execname,
1975 current_processes);
1976 logindex++;
1977 }
1978 /* Should use SCRIPT but there is no access to this variable at that
1979 * point. */
1980 proc:::start /ppid == PID && execname == "Python"/ {
1981 trackedpid[pid] = 2;
1982 current_processes += 1;
1983 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1984 logindex, pid, probeprov, probename,
1985 ppid,
1986 execname,
1987 current_processes);
1988 logindex++;
1989 }
1990 proc:::exit /trackedpid[pid] &&
1991 current_processes == 1 &&
1992 waiting_to_die == 1/ {
1993 trackedpid[pid] = 0;
1994 current_processes -= 1;
1995 printf("%d %d %s_%s(%d)\\n",
1996 logindex, pid, probeprov, probename,
1997 current_processes);
1998 logindex++;
1999 exit(0);
2000 }
2001 proc:::exit /trackedpid[pid]/ {
2002 trackedpid[pid] = 0;
2003 current_processes -= 1;
2004 printf("%d %d %s_%s(%d)\\n",
2005 logindex, pid, probeprov, probename,
2006 current_processes);
2007 logindex++;
2008 }
2009
2010 /* Use an arcane function to detect when we need to die */
2011 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
2012 waiting_to_die = 1;
2013 printf("%d %d %s()\\n", logindex, pid, probefunc);
2014 logindex++;
2015 }
2016 syscall::ftruncate:entry /
2017 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
2018 exit(0);
2019 }
2020
2021 syscall::open*:entry /trackedpid[pid] == 1/ {
2022 self->open_arg0 = arg0;
2023 self->open_arg1 = arg1;
2024 self->open_arg2 = arg2;
2025 }
2026 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
2027 this->open_arg0 = copyinstr(self->open_arg0);
2028 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
2029 logindex, pid, probefunc,
2030 this->open_arg0,
2031 self->open_arg1,
2032 self->open_arg2);
2033 logindex++;
2034 this->open_arg0 = 0;
2035 }
2036 syscall::open*:return /trackedpid[pid] == 1/ {
2037 self->open_arg0 = 0;
2038 self->open_arg1 = 0;
2039 self->open_arg2 = 0;
2040 }
2041
2042 syscall::rename:entry /trackedpid[pid] == 1/ {
2043 self->rename_arg0 = arg0;
2044 self->rename_arg1 = arg1;
2045 }
2046 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
2047 this->rename_arg0 = copyinstr(self->rename_arg0);
2048 this->rename_arg1 = copyinstr(self->rename_arg1);
2049 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
2050 logindex, pid, probefunc,
2051 this->rename_arg0,
2052 this->rename_arg1);
2053 logindex++;
2054 this->rename_arg0 = 0;
2055 this->rename_arg1 = 0;
2056 }
2057 syscall::rename:return /trackedpid[pid] == 1/ {
2058 self->rename_arg0 = 0;
2059 self->rename_arg1 = 0;
2060 }
2061
2062 /* Track chdir, it's painful because it is only receiving relative path.
2063 */
2064 syscall::chdir:entry /trackedpid[pid] == 1/ {
2065 self->chdir_arg0 = arg0;
2066 }
2067 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
2068 this->chdir_arg0 = copyinstr(self->chdir_arg0);
2069 printf("%d %d %s(\\"%s\\")\\n",
2070 logindex, pid, probefunc,
2071 this->chdir_arg0);
2072 logindex++;
2073 this->chdir_arg0 = 0;
2074 }
2075 syscall::chdir:return /trackedpid[pid] == 1/ {
2076 self->chdir_arg0 = 0;
2077 }
2078 """
2079
2080 # execve-specific code, tends to throw a lot of exceptions.
2081 D_CODE_EXECVE = """
2082 /* Finally what we care about! */
2083 syscall::exec*:entry /trackedpid[pid]/ {
2084 self->exec_arg0 = copyinstr(arg0);
2085 /* Incrementally probe for a NULL in the argv parameter of execve() to
2086 * figure out argc. */
2087 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
2088 * found. */
2089 self->exec_argc = 0;
2090 /* Probe for argc==1 */
2091 this->exec_argv = (user_addr_t*)copyin(
2092 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2093 self->exec_argc = this->exec_argv[self->exec_argc] ?
2094 (self->exec_argc + 1) : self->exec_argc;
2095
2096 /* Probe for argc==2 */
2097 this->exec_argv = (user_addr_t*)copyin(
2098 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2099 self->exec_argc = this->exec_argv[self->exec_argc] ?
2100 (self->exec_argc + 1) : self->exec_argc;
2101
2102 /* Probe for argc==3 */
2103 this->exec_argv = (user_addr_t*)copyin(
2104 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2105 self->exec_argc = this->exec_argv[self->exec_argc] ?
2106 (self->exec_argc + 1) : self->exec_argc;
2107
2108 /* Probe for argc==4 */
2109 this->exec_argv = (user_addr_t*)copyin(
2110 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2111 self->exec_argc = this->exec_argv[self->exec_argc] ?
2112 (self->exec_argc + 1) : self->exec_argc;
2113
2114 /* Copy the inputs strings since there is no guarantee they'll be
2115 * present after the call completed. */
2116 self->exec_argv0 = (self->exec_argc > 0) ?
2117 copyinstr(this->exec_argv[0]) : "";
2118 self->exec_argv1 = (self->exec_argc > 1) ?
2119 copyinstr(this->exec_argv[1]) : "";
2120 self->exec_argv2 = (self->exec_argc > 2) ?
2121 copyinstr(this->exec_argv[2]) : "";
2122 self->exec_argv3 = (self->exec_argc > 3) ?
2123 copyinstr(this->exec_argv[3]) : "";
2124 this->exec_argv = 0;
2125 }
2126 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2127 /* We need to join strings here, as using multiple printf() would
2128 * cause tearing when multiple threads/processes are traced.
2129 * Since it is impossible to escape a string and join it to another one,
2130 * like sprintf("%s%S", previous, more), use hackery.
2131 * Each of the elements are split with a \\1. \\0 cannot be used because
2132 * it is simply ignored. This will conflict with any program putting a
2133 * \\1 in their execve() string but this should be "rare enough" */
2134 this->args = "";
2135 /* Process exec_argv[0] */
2136 this->args = strjoin(
2137 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2138
2139 /* Process exec_argv[1] */
2140 this->args = strjoin(
2141 this->args, (self->exec_argc > 1) ? "\\1" : "");
2142 this->args = strjoin(
2143 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2144
2145 /* Process exec_argv[2] */
2146 this->args = strjoin(
2147 this->args, (self->exec_argc > 2) ? "\\1" : "");
2148 this->args = strjoin(
2149 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2150
2151 /* Process exec_argv[3] */
2152 this->args = strjoin(
2153 this->args, (self->exec_argc > 3) ? "\\1" : "");
2154 this->args = strjoin(
2155 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2156
2157 /* Prints self->exec_argc to permits verifying the internal
2158 * consistency since this code is quite fishy. */
2159 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2160 logindex, pid, probefunc,
2161 self->exec_arg0,
2162 self->exec_argc,
2163 this->args);
2164 logindex++;
2165 this->args = 0;
2166 }
2167 syscall::exec*:return /trackedpid[pid]/ {
2168 self->exec_arg0 = 0;
2169 self->exec_argc = 0;
2170 self->exec_argv0 = 0;
2171 self->exec_argv1 = 0;
2172 self->exec_argv2 = 0;
2173 self->exec_argv3 = 0;
2174 }
2175 """
2176
2177 # Code currently not used.
2178 D_EXTRANEOUS = """
2179 /* This is a good learning experience, since it traces a lot of things
2180 * related to the process and child processes.
2181 * Warning: it generates a gigantic log. For example, tracing
2182 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2183 * several minutes to execute.
2184 */
2185 /*
2186 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2187 printf("%d %d %s_%s() = %d\\n",
2188 logindex, pid, probeprov, probefunc, errno);
2189 logindex++;
2190 }
2191 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2192 printf("%d %d %s_%s() = %d\\n",
2193 logindex, pid, probeprov, probefunc, errno);
2194 logindex++;
2195 }
2196 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2197 printf("%d %d %s_%s() = %d\\n",
2198 logindex, pid, probeprov, probefunc, errno);
2199 logindex++;
2200 }
2201 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2202 printf("%d %d %s_%s() = %d\\n",
2203 logindex, pid, probeprov, probefunc, errno);
2204 logindex++;
2205 }
2206 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2207 printf("%d %d %s_%s() = %d\\n",
2208 logindex, pid, probeprov, probefunc, errno);
2209 logindex++;
2210 }
2211 */
2212 /* TODO(maruel): *stat* functions and friends
2213 syscall::access:return,
2214 syscall::chdir:return,
2215 syscall::chflags:return,
2216 syscall::chown:return,
2217 syscall::chroot:return,
2218 syscall::getattrlist:return,
2219 syscall::getxattr:return,
2220 syscall::lchown:return,
2221 syscall::lstat64:return,
2222 syscall::lstat:return,
2223 syscall::mkdir:return,
2224 syscall::pathconf:return,
2225 syscall::readlink:return,
2226 syscall::removexattr:return,
2227 syscall::setxattr:return,
2228 syscall::stat64:return,
2229 syscall::stat:return,
2230 syscall::truncate:return,
2231 syscall::unlink:return,
2232 syscall::utimes:return,
2233 */
2234 """
2235
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002236 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002237 """Starts the log collection with dtrace.
2238
2239 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2240 this needs to wait for dtrace to be "warmed up".
2241 """
2242 super(Dtrace.Tracer, self).__init__(logname)
2243 self._script = create_thunk()
2244 # This unique dummy temp file is used to signal the dtrace script that it
2245 # should stop as soon as all the child processes are done. A bit hackish
2246 # but works fine enough.
2247 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2248 prefix='trace_signal_file')
2249
2250 # Note: do not use the -p flag. It's useless if the initial process quits
2251 # too fast, resulting in missing traces from the grand-children. The D
2252 # code manages the dtrace lifetime itself.
2253 trace_cmd = [
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002254 'dtrace',
2255 # Use a larger buffer if getting 'out of scratch space' errors.
2256 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2257 '-b', '10m',
2258 '-x', 'dynvarsize=10m',
2259 #'-x', 'dtrace_global_maxsize=1m',
2260 '-x', 'evaltime=exec',
2261 '-o', '/dev/stderr',
2262 '-q',
2263 '-n', self._get_dtrace_code(),
2264 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002265 if use_sudo is not False:
2266 trace_cmd.insert(0, 'sudo')
2267
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002268 with open(self._logname + '.log', 'wb') as logfile:
2269 self._dtrace = subprocess.Popen(
2270 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2271 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2272
2273 # Reads until one line is printed, which signifies dtrace is up and ready.
2274 with open(self._logname + '.log', 'rb') as logfile:
2275 while 'dtrace_BEGIN' not in logfile.readline():
2276 if self._dtrace.poll() is not None:
2277 # Do a busy wait. :/
2278 break
2279 logging.debug('dtrace started')
2280
2281 def _get_dtrace_code(self):
2282 """Setups the D code to implement child process tracking.
2283
2284 Injects the cookie in the script so it knows when to stop.
2285
2286 The script will detect any instance of the script created with
2287 create_thunk() and will start tracing it.
2288 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002289 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002290 'inline int PID = %d;\n'
2291 'inline string SCRIPT = "%s";\n'
2292 'inline int FILE_ID = %d;\n'
2293 '\n'
2294 '%s') % (
2295 os.getpid(),
2296 self._script,
2297 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002298 self.D_CODE)
2299 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2300 # Do not enable by default since it tends to spew dtrace: error lines
2301 # because the execve() parameters are not in valid memory at the time of
2302 # logging.
2303 # TODO(maruel): Find a way to make this reliable since it's useful but
2304 # only works in limited/trivial uses cases for now.
2305 out += self.D_CODE_EXECVE
2306 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002307
2308 def trace(self, cmd, cwd, tracename, output):
2309 """Runs dtrace on an executable.
2310
2311 This dtruss is broken when it starts the process itself or when tracing
2312 child processes, this code starts a wrapper process
2313 generated with create_thunk() which starts the executable to trace.
2314 """
2315 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2316 assert os.path.isabs(cmd[0]), cmd[0]
2317 assert os.path.isabs(cwd), cwd
2318 assert os.path.normpath(cwd) == cwd, cwd
2319 with self._lock:
2320 if not self._initialized:
2321 raise TracingFailure(
2322 'Called Tracer.trace() on an unitialized object',
2323 None, None, None, tracename)
2324 assert tracename not in (i['trace'] for i in self._traces)
2325
2326 # Starts the script wrapper to start the child process. This signals the
2327 # dtrace script that this process is to be traced.
2328 stdout = stderr = None
2329 if output:
2330 stdout = subprocess.PIPE
2331 stderr = subprocess.STDOUT
2332 child_cmd = [
2333 sys.executable,
2334 self._script,
2335 tracename,
2336 ]
2337 # Call a dummy function so that dtrace knows I'm about to launch a process
2338 # that needs to be traced.
2339 # Yummy.
2340 child = subprocess.Popen(
2341 child_cmd + fix_python_path(cmd),
2342 stdin=subprocess.PIPE,
2343 stdout=stdout,
2344 stderr=stderr,
2345 cwd=cwd)
2346 logging.debug('Started child pid: %d' % child.pid)
2347
2348 out = child.communicate()[0]
2349 # This doesn't mean tracing is done, one of the grand-child process may
2350 # still be alive. It will be tracked with the dtrace script.
2351
2352 with self._lock:
2353 assert tracename not in (i['trace'] for i in self._traces)
2354 self._traces.append(
2355 {
2356 'cmd': cmd,
2357 'cwd': cwd,
2358 # The pid of strace process, not very useful.
2359 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002360 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002361 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002362 })
2363 return child.returncode, out
2364
2365 def close(self, timeout=None):
2366 """Terminates dtrace."""
2367 logging.debug('close(%s)' % timeout)
2368 try:
2369 try:
2370 super(Dtrace.Tracer, self).close(timeout)
2371 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002372 # ftruncate doesn't exist on Windows.
2373 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002374 if timeout:
2375 start = time.time()
2376 # Use polling. :/
2377 while (self._dtrace.poll() is None and
2378 (time.time() - start) < timeout):
2379 time.sleep(0.1)
2380 self._dtrace.kill()
2381 self._dtrace.wait()
2382 finally:
2383 # Make sure to kill it in any case.
2384 if self._dtrace.poll() is None:
2385 try:
2386 self._dtrace.kill()
2387 self._dtrace.wait()
2388 except OSError:
2389 pass
2390
2391 if self._dtrace.returncode != 0:
2392 # Warn about any dtrace failure but basically ignore it.
2393 print 'dtrace failure: %s' % self._dtrace.returncode
2394 finally:
2395 os.close(self._dummy_file_id)
2396 os.remove(self._dummy_file_name)
2397 os.remove(self._script)
2398
2399 def post_process_log(self):
2400 """Sorts the log back in order when each call occured.
2401
2402 dtrace doesn't save the buffer in strict order since it keeps one buffer
2403 per CPU.
2404 """
2405 super(Dtrace.Tracer, self).post_process_log()
2406 logname = self._logname + '.log'
2407 with open(logname, 'rb') as logfile:
2408 lines = [l for l in logfile if l.strip()]
2409 errors = [l for l in lines if l.startswith('dtrace:')]
2410 if errors:
2411 raise TracingFailure(
2412 'Found errors in the trace: %s' % '\n'.join(errors),
2413 None, None, None, logname)
2414 try:
2415 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2416 except ValueError:
2417 raise TracingFailure(
2418 'Found errors in the trace: %s' % '\n'.join(
2419 l for l in lines if l.split(' ', 1)[0].isdigit()),
2420 None, None, None, logname)
2421 with open(logname, 'wb') as logfile:
2422 logfile.write(''.join(lines))
2423
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002424 def __init__(self, use_sudo=None):
2425 super(Dtrace, self).__init__()
2426 self.use_sudo = use_sudo
2427
2428 def get_tracer(self, logname):
2429 return self.Tracer(logname, self.use_sudo)
2430
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002431 @staticmethod
2432 def clean_trace(logname):
2433 for ext in ('', '.log'):
2434 if os.path.isfile(logname + ext):
2435 os.remove(logname + ext)
2436
2437 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002438 def parse_log(cls, logname, blacklist, trace_name):
2439 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002440 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002441
2442 def blacklist_more(filepath):
2443 # All the HFS metadata is in the form /.vol/...
2444 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2445
2446 data = read_json(logname)
2447 out = []
2448 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002449 if trace_name and item['trace'] != trace_name:
2450 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002451 result = {
2452 'output': item['output'],
2453 'trace': item['trace'],
2454 }
2455 try:
2456 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002457 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2458 # be valid UTF-8 and we control the log output.
2459 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002460 context.on_line(line)
2461 result['results'] = context.to_results()
2462 except TracingFailure:
2463 result['exception'] = sys.exc_info()
2464 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002465 return out
2466
2467
2468class LogmanTrace(ApiBase):
2469 """Uses the native Windows ETW based tracing functionality to trace a child
2470 process.
2471
2472 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2473 the Windows Kernel doesn't have a concept of 'current working directory' at
2474 all. A Win32 process has a map of current directories, one per drive letter
2475 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2476 opened relative to another file_object or as an absolute path. All the current
2477 working directory logic is done in user mode.
2478 """
2479 class Context(ApiBase.Context):
2480 """Processes a ETW log line and keeps the list of existent and non
2481 existent files accessed.
2482
2483 Ignores directories.
2484 """
2485 # These indexes are for the stripped version in json.
2486 EVENT_NAME = 0
2487 TYPE = 1
2488 PID = 2
2489 TID = 3
2490 PROCESSOR_ID = 4
2491 TIMESTAMP = 5
2492 USER_DATA = 6
2493
2494 class Process(ApiBase.Context.Process):
2495 def __init__(self, *args):
2496 super(LogmanTrace.Context.Process, self).__init__(*args)
2497 # Handle file objects that succeeded.
2498 self.file_objects = {}
2499
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002500 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2501 logging.info(
2502 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2503 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002504 super(LogmanTrace.Context, self).__init__(blacklist)
2505 self._drive_map = DosDriveMap()
2506 # Threads mapping to the corresponding process id.
2507 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002508 # Process ID of the tracer, e.g. the temporary script created by
2509 # create_thunk(). This is tricky because the process id may have been
2510 # reused.
2511 self._thunk_pid = thunk_pid
2512 self._thunk_cmd = thunk_cmd
2513 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002514 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002515 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002516
2517 def on_line(self, line):
2518 """Processes a json Event line."""
2519 self._line_number += 1
2520 try:
2521 # By Opcode
2522 handler = getattr(
2523 self,
2524 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2525 None)
2526 if not handler:
2527 raise TracingFailure(
2528 'Unexpected event %s_%s' % (
2529 line[self.EVENT_NAME], line[self.TYPE]),
2530 None, None, None)
2531 handler(line)
2532 except TracingFailure, e:
2533 # Hack in the values since the handler could be a static function.
2534 e.pid = line[self.PID]
2535 e.line = line
2536 e.line_number = self._line_number
2537 # Re-raise the modified exception.
2538 raise
2539 except (KeyError, NotImplementedError, ValueError), e:
2540 raise TracingFailure(
2541 'Trace generated a %s exception: %s' % (
2542 e.__class__.__name__, str(e)),
2543 line[self.PID],
2544 self._line_number,
2545 line,
2546 e)
2547
2548 def to_results(self):
2549 if not self.root_process:
2550 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002551 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002552 None, None, None)
2553 process = self.root_process.to_results_process()
2554 return Results(process)
2555
2556 def _thread_to_process(self, tid):
2557 """Finds the process from the thread id."""
2558 tid = int(tid, 16)
2559 pid = self._threads_active.get(tid)
2560 if not pid or not self._process_lookup.get(pid):
2561 return
2562 return self._process_lookup[pid]
2563
2564 @classmethod
2565 def handle_EventTrace_Header(cls, line):
2566 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2567 BUFFER_SIZE = cls.USER_DATA
2568 #VERSION = cls.USER_DATA + 1
2569 #PROVIDER_VERSION = cls.USER_DATA + 2
2570 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2571 #END_TIME = cls.USER_DATA + 4
2572 #TIMER_RESOLUTION = cls.USER_DATA + 5
2573 #MAX_FILE_SIZE = cls.USER_DATA + 6
2574 #LOG_FILE_MODE = cls.USER_DATA + 7
2575 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2576 #START_BUFFERS = cls.USER_DATA + 9
2577 #POINTER_SIZE = cls.USER_DATA + 10
2578 EVENTS_LOST = cls.USER_DATA + 11
2579 #CPU_SPEED = cls.USER_DATA + 12
2580 #LOGGER_NAME = cls.USER_DATA + 13
2581 #LOG_FILE_NAME = cls.USER_DATA + 14
2582 #BOOT_TIME = cls.USER_DATA + 15
2583 #PERF_FREQ = cls.USER_DATA + 16
2584 #START_TIME = cls.USER_DATA + 17
2585 #RESERVED_FLAGS = cls.USER_DATA + 18
2586 #BUFFERS_LOST = cls.USER_DATA + 19
2587 #SESSION_NAME_STRING = cls.USER_DATA + 20
2588 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2589 if line[EVENTS_LOST] != '0':
2590 raise TracingFailure(
2591 ( '%s events were lost during trace, please increase the buffer '
2592 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2593 None, None, None)
2594
2595 def handle_FileIo_Cleanup(self, line):
2596 """General wisdom: if a file is closed, it's because it was opened.
2597
2598 Note that FileIo_Close is not used since if a file was opened properly but
2599 not closed before the process exits, only Cleanup will be logged.
2600 """
2601 #IRP = self.USER_DATA
2602 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2603 FILE_OBJECT = self.USER_DATA + 2
2604 #FILE_KEY = self.USER_DATA + 3
2605 proc = self._thread_to_process(line[TTID])
2606 if not proc:
2607 # Not a process we care about.
2608 return
2609 file_object = line[FILE_OBJECT]
2610 if file_object in proc.file_objects:
2611 proc.add_file(proc.file_objects.pop(file_object), False)
2612
2613 def handle_FileIo_Create(self, line):
2614 """Handles a file open.
2615
2616 All FileIo events are described at
2617 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2618 for some value of 'description'.
2619
2620 " (..) process and thread id values of the IO events (..) are not valid "
2621 http://msdn.microsoft.com/magazine/ee358703.aspx
2622
2623 The FileIo.Create event doesn't return if the CreateFile() call
2624 succeeded, so keep track of the file_object and check that it is
2625 eventually closed with FileIo_Cleanup.
2626 """
2627 #IRP = self.USER_DATA
2628 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2629 FILE_OBJECT = self.USER_DATA + 2
2630 #CREATE_OPTIONS = self.USER_DATA + 3
2631 #FILE_ATTRIBUTES = self.USER_DATA + 4
2632 #self.USER_DATA + SHARE_ACCESS = 5
2633 OPEN_PATH = self.USER_DATA + 6
2634
2635 proc = self._thread_to_process(line[TTID])
2636 if not proc:
2637 # Not a process we care about.
2638 return
2639
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002640 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002641 # Ignore directories and bare drive right away.
2642 if raw_path.endswith(os.path.sep):
2643 return
2644 filepath = self._drive_map.to_win32(raw_path)
2645 # Ignore bare drive right away. Some may still fall through with format
2646 # like '\\?\X:'
2647 if len(filepath) == 2:
2648 return
2649 file_object = line[FILE_OBJECT]
2650 if os.path.isdir(filepath):
2651 # There is no O_DIRECTORY equivalent on Windows. The closed is
2652 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2653 # simply discard directories are they are found.
2654 return
2655 # Override any stale file object
2656 proc.file_objects[file_object] = filepath
2657
2658 def handle_FileIo_Rename(self, line):
2659 # TODO(maruel): Handle?
2660 pass
2661
2662 def handle_Process_End(self, line):
2663 pid = line[self.PID]
2664 if self._process_lookup.get(pid):
2665 logging.info('Terminated: %d' % pid)
2666 self._process_lookup[pid] = None
2667 else:
2668 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002669 if self._thunk_process and self._thunk_process.pid == pid:
2670 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002671
2672 def handle_Process_Start(self, line):
2673 """Handles a new child process started by PID."""
2674 #UNIQUE_PROCESS_KEY = self.USER_DATA
2675 PROCESS_ID = self.USER_DATA + 1
2676 #PARENT_PID = self.USER_DATA + 2
2677 #SESSION_ID = self.USER_DATA + 3
2678 #EXIT_STATUS = self.USER_DATA + 4
2679 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2680 #USER_SID = self.USER_DATA + 6
2681 IMAGE_FILE_NAME = self.USER_DATA + 7
2682 COMMAND_LINE = self.USER_DATA + 8
2683
2684 ppid = line[self.PID]
2685 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002686 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002687 logging.debug(
2688 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002689 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002690
2691 if pid == self._thunk_pid:
2692 # Need to ignore processes we don't know about because the log is
2693 # system-wide. self._thunk_pid shall start only one process.
2694 # This is tricky though because Windows *loves* to reuse process id and
2695 # it happens often that the process ID of the thunk script created by
2696 # create_thunk() is reused. So just detecting the pid here is not
2697 # sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002698 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2699 logging.info(
2700 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2701 pid, self._trace_name, command_line, self._thunk_cmd)
2702 return
2703
2704 # TODO(maruel): The check is quite weak. Add the thunk path.
2705 if self._thunk_process:
2706 raise TracingFailure(
2707 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2708 'already set') % (self._thunk_pid, self._thunk_process.pid),
2709 None, None, None)
2710 proc = self.Process(self.blacklist, pid, None)
2711 self._thunk_process = proc
2712 return
2713 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002714 proc = self.Process(self.blacklist, pid, None)
2715 self.root_process = proc
2716 ppid = None
2717 elif self._process_lookup.get(ppid):
2718 proc = self.Process(self.blacklist, pid, None)
2719 self._process_lookup[ppid].children.append(proc)
2720 else:
2721 # Ignore
2722 return
2723 self._process_lookup[pid] = proc
2724
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002725 proc.command = command_line
2726 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002727 # proc.command[0] may be the absolute path of 'executable' but it may be
2728 # anything else too. If it happens that command[0] ends with executable,
2729 # use it, otherwise defaults to the base name.
2730 cmd0 = proc.command[0].lower()
2731 if not cmd0.endswith('.exe'):
2732 # TODO(maruel): That's not strictly true either.
2733 cmd0 += '.exe'
2734 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
2735 # Fix the path.
2736 cmd0 = cmd0.replace('/', os.path.sep)
2737 cmd0 = os.path.normpath(cmd0)
2738 proc.executable = get_native_path_case(cmd0)
2739 logging.info(
2740 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2741
2742 def handle_Thread_End(self, line):
2743 """Has the same parameters as Thread_Start."""
2744 tid = int(line[self.TID], 16)
2745 self._threads_active.pop(tid, None)
2746
2747 def handle_Thread_Start(self, line):
2748 """Handles a new thread created.
2749
2750 Do not use self.PID here since a process' initial thread is created by
2751 the parent process.
2752 """
2753 PROCESS_ID = self.USER_DATA
2754 TTHREAD_ID = self.USER_DATA + 1
2755 #STACK_BASE = self.USER_DATA + 2
2756 #STACK_LIMIT = self.USER_DATA + 3
2757 #USER_STACK_BASE = self.USER_DATA + 4
2758 #USER_STACK_LIMIT = self.USER_DATA + 5
2759 #AFFINITY = self.USER_DATA + 6
2760 #WIN32_START_ADDR = self.USER_DATA + 7
2761 #TEB_BASE = self.USER_DATA + 8
2762 #SUB_PROCESS_TAG = self.USER_DATA + 9
2763 #BASE_PRIORITY = self.USER_DATA + 10
2764 #PAGE_PRIORITY = self.USER_DATA + 11
2765 #IO_PRIORITY = self.USER_DATA + 12
2766 #THREAD_FLAGS = self.USER_DATA + 13
2767 # Do not use self.PID here since a process' initial thread is created by
2768 # the parent process.
2769 pid = int(line[PROCESS_ID], 16)
2770 tid = int(line[TTHREAD_ID], 16)
2771 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2772 self._threads_active[tid] = pid
2773
2774 @classmethod
2775 def supported_events(cls):
2776 """Returns all the procesed events."""
2777 out = []
2778 for member in dir(cls):
2779 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2780 if match:
2781 out.append(match.groups())
2782 return out
2783
2784 class Tracer(ApiBase.Tracer):
2785 # The basic headers.
2786 EXPECTED_HEADER = [
2787 u'Event Name',
2788 u'Type',
2789 u'Event ID',
2790 u'Version',
2791 u'Channel',
2792 u'Level', # 5
2793 u'Opcode',
2794 u'Task',
2795 u'Keyword',
2796 u'PID',
2797 u'TID', # 10
2798 u'Processor Number',
2799 u'Instance ID',
2800 u'Parent Instance ID',
2801 u'Activity ID',
2802 u'Related Activity ID', # 15
2803 u'Clock-Time',
2804 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2805 u'User(ms)', # pretty much useless.
2806 u'User Data', # Extra arguments that are event-specific.
2807 ]
2808 # Only the useful headers common to all entries are listed there. Any column
2809 # at 19 or higher is dependent on the specific event.
2810 EVENT_NAME = 0
2811 TYPE = 1
2812 PID = 9
2813 TID = 10
2814 PROCESSOR_ID = 11
2815 TIMESTAMP = 16
2816 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2817 USER_DATA = 19
2818
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002819 class CsvReader(object):
2820 """CSV reader that reads files generated by tracerpt.exe.
2821
2822 csv.reader() fails to read them properly, it mangles file names quoted
2823 with "" with a comma in it.
2824 """
2825 # 0. Had a ',' or one of the following ' ' after a comma, next should
2826 # be ' ', '"' or string or ',' for an empty field.
2827 ( HAD_DELIMITER,
2828 # 1. Processing an unquoted field up to ','.
2829 IN_STR,
2830 # 2. Processing a new field starting with '"'.
2831 STARTING_STR_QUOTED,
2832 # 3. Second quote in a row at the start of a field. It could be either
2833 # '""foo""' or '""'. Who the hell thought it was a great idea to use
2834 # the same character for delimiting and escaping?
2835 STARTING_SECOND_QUOTE,
2836 # 4. A quote inside a quoted string where the previous character was
2837 # not a quote, so the string is not empty. Can be either: end of a
2838 # quoted string (a delimiter) or a quote escape. The next char must be
2839 # either '"' or ','.
2840 HAD_QUOTE_IN_QUOTED,
2841 # 5. Second quote inside a quoted string.
2842 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
2843 # 6. Processing a field that started with '"'.
2844 IN_STR_QUOTED) = range(7)
2845
2846 def __init__(self, f):
2847 self.f = f
2848
2849 def __iter__(self):
2850 return self
2851
2852 def next(self):
2853 """Splits the line in fields."""
2854 line = self.f.readline()
2855 if not line:
2856 raise StopIteration()
2857 line = line.strip()
2858 fields = []
2859 state = self.HAD_DELIMITER
2860 for i, c in enumerate(line):
2861 if state == self.HAD_DELIMITER:
2862 if c == ',':
2863 # Empty field.
2864 fields.append('')
2865 elif c == ' ':
2866 # Ignore initial whitespaces
2867 pass
2868 elif c == '"':
2869 state = self.STARTING_STR_QUOTED
2870 fields.append('')
2871 else:
2872 # Start of a new field.
2873 state = self.IN_STR
2874 fields.append(c)
2875
2876 elif state == self.IN_STR:
2877 # Do not accept quote inside unquoted field.
2878 assert c != '"', (i, c, line, fields)
2879 if c == ',':
2880 fields[-1] = fields[-1].strip()
2881 state = self.HAD_DELIMITER
2882 else:
2883 fields[-1] = fields[-1] + c
2884
2885 elif state == self.STARTING_STR_QUOTED:
2886 if c == '"':
2887 # Do not store the character yet.
2888 state = self.STARTING_SECOND_QUOTE
2889 else:
2890 state = self.IN_STR_QUOTED
2891 fields[-1] = fields[-1] + c
2892
2893 elif state == self.STARTING_SECOND_QUOTE:
2894 if c == ',':
2895 # It was an empty field. '""' == ''.
2896 state = self.HAD_DELIMITER
2897 else:
2898 fields[-1] = fields[-1] + '"' + c
2899 state = self.IN_STR_QUOTED
2900
2901 elif state == self.HAD_QUOTE_IN_QUOTED:
2902 if c == ',':
2903 # End of the string.
2904 state = self.HAD_DELIMITER
2905 elif c == '"':
2906 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
2907 else:
2908 # The previous double-quote was just an unescaped quote.
2909 fields[-1] = fields[-1] + '"' + c
2910 state = self.IN_STR_QUOTED
2911
2912 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2913 if c == ',':
2914 # End of the string.
2915 state = self.HAD_DELIMITER
2916 fields[-1] = fields[-1] + '"'
2917 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00002918 # That's just how the logger rolls. Revert back to appending the
2919 # char and "guess" it was a quote in a double-quoted string.
2920 state = self.IN_STR_QUOTED
2921 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002922
2923 elif state == self.IN_STR_QUOTED:
2924 if c == '"':
2925 # Could be a delimiter or an escape.
2926 state = self.HAD_QUOTE_IN_QUOTED
2927 else:
2928 fields[-1] = fields[-1] + c
2929
2930 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2931 fields[-1] = fields[-1] + '"'
2932 else:
2933 assert state in (
2934 # Terminated with a normal field.
2935 self.IN_STR,
2936 # Terminated with an empty field.
2937 self.STARTING_SECOND_QUOTE,
2938 # Terminated with a normal quoted field.
2939 self.HAD_QUOTE_IN_QUOTED), (
2940 line, state, fields)
2941 return fields
2942
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002943 def __init__(self, logname):
2944 """Starts the log collection.
2945
2946 Requires administrative access. logman.exe is synchronous so no need for a
2947 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2948 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2949
2950 One can get the list of potentially interesting providers with:
2951 "logman query providers | findstr /i file"
2952 """
2953 super(LogmanTrace.Tracer, self).__init__(logname)
2954 self._script = create_thunk()
2955 cmd_start = [
2956 'logman.exe',
2957 'start',
2958 'NT Kernel Logger',
2959 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2960 # splitio,fileiocompletion,syscall,file,cswitch,img
2961 '(process,fileio,thread)',
2962 '-o', self._logname + '.etl',
2963 '-ets', # Send directly to kernel
2964 # Values extracted out of thin air.
2965 # Event Trace Session buffer size in kb.
2966 '-bs', '10240',
2967 # Number of Event Trace Session buffers.
2968 '-nb', '16', '256',
2969 ]
2970 logging.debug('Running: %s' % cmd_start)
2971 try:
2972 subprocess.check_call(
2973 cmd_start,
2974 stdin=subprocess.PIPE,
2975 stdout=subprocess.PIPE,
2976 stderr=subprocess.STDOUT)
2977 except subprocess.CalledProcessError, e:
2978 if e.returncode == -2147024891:
2979 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2980 elif e.returncode == -2144337737:
2981 print >> sys.stderr, (
2982 'A kernel trace was already running, stop it and try again')
2983 raise
2984
2985 def trace(self, cmd, cwd, tracename, output):
2986 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2987 assert os.path.isabs(cmd[0]), cmd[0]
2988 assert os.path.isabs(cwd), cwd
2989 assert os.path.normpath(cwd) == cwd, cwd
2990 with self._lock:
2991 if not self._initialized:
2992 raise TracingFailure(
2993 'Called Tracer.trace() on an unitialized object',
2994 None, None, None, tracename)
2995 assert tracename not in (i['trace'] for i in self._traces)
2996
2997 # Use "logman -?" for help.
2998
2999 stdout = stderr = None
3000 if output:
3001 stdout = subprocess.PIPE
3002 stderr = subprocess.STDOUT
3003
3004 # Run the child process.
3005 logging.debug('Running: %s' % cmd)
3006 # Use the temporary script generated with create_thunk() so we have a
3007 # clear pid owner. Since trace_inputs.py can be used as a library and
3008 # could trace multiple processes simultaneously, it makes it more complex
3009 # if the executable to be traced is executed directly here. It also solves
3010 # issues related to logman.exe that needs to be executed to control the
3011 # kernel trace.
3012 child_cmd = [
3013 sys.executable,
3014 self._script,
3015 tracename,
3016 ]
3017 child = subprocess.Popen(
3018 child_cmd + fix_python_path(cmd),
3019 cwd=cwd,
3020 stdin=subprocess.PIPE,
3021 stdout=stdout,
3022 stderr=stderr)
3023 logging.debug('Started child pid: %d' % child.pid)
3024 out = child.communicate()[0]
3025 # This doesn't mean all the grand-children are done. Sadly, we don't have
3026 # a good way to determine that.
3027
3028 with self._lock:
3029 assert tracename not in (i['trace'] for i in self._traces)
3030 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00003031 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003032 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00003033 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003034 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00003035 # Used to figure out the real process when process ids are reused.
3036 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003037 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003038 })
3039
3040 return child.returncode, out
3041
3042 def close(self, _timeout=None):
3043 """Stops the kernel log collection and converts the traces to text
3044 representation.
3045 """
3046 with self._lock:
3047 if not self._initialized:
3048 raise TracingFailure(
3049 'Called Tracer.close() on an unitialized object',
3050 None, None, None)
3051 os.remove(self._script)
3052 # Save metadata, add 'format' key..
3053 data = {
3054 'format': 'csv',
3055 'traces': self._traces,
3056 }
3057 write_json(self._logname, data, False)
3058
3059 cmd_stop = [
3060 'logman.exe',
3061 'stop',
3062 'NT Kernel Logger',
3063 '-ets', # Sends the command directly to the kernel.
3064 ]
3065 logging.debug('Running: %s' % cmd_stop)
3066 subprocess.check_call(
3067 cmd_stop,
3068 stdin=subprocess.PIPE,
3069 stdout=subprocess.PIPE,
3070 stderr=subprocess.STDOUT)
3071 self._initialized = False
3072
3073 def post_process_log(self):
3074 """Converts the .etl file into .csv then into .json."""
3075 super(LogmanTrace.Tracer, self).post_process_log()
3076 logformat = 'csv'
3077 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003078 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003079
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003080 def _trim_log(self, logformat):
3081 """Reduces the amount of data in original log by generating a 'reduced'
3082 log.
3083 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003084 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003085 file_handle = codecs.open(
3086 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003087
3088 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003089 assert sys.getfilesystemencoding() == 'mbcs'
3090 file_handle = codecs.open(
3091 self._logname + '.' + logformat, 'r',
3092 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003093
3094 supported_events = LogmanTrace.Context.supported_events()
3095
3096 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003097 """Loads items from the generator and returns the interesting data.
3098
3099 It filters out any uninteresting line and reduce the amount of data in
3100 the trace.
3101 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003102 for index, line in enumerate(generator):
3103 if not index:
3104 if line != self.EXPECTED_HEADER:
3105 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003106 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003107 None, None, None)
3108 continue
3109 # As you can see, the CSV is full of useful non-redundant information:
3110 if (line[2] != '0' or # Event ID
3111 line[3] not in ('2', '3') or # Version
3112 line[4] != '0' or # Channel
3113 line[5] != '0' or # Level
3114 line[7] != '0' or # Task
3115 line[8] != '0x0000000000000000' or # Keyword
3116 line[12] != '' or # Instance ID
3117 line[13] != '' or # Parent Instance ID
3118 line[14] != self.NULL_GUID or # Activity ID
3119 line[15] != ''): # Related Activity ID
3120 raise TracingFailure(
3121 'Found unexpected values in line: %s' % ' '.join(line),
3122 None, None, None)
3123
3124 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3125 continue
3126
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003127 yield [
3128 line[self.EVENT_NAME],
3129 line[self.TYPE],
3130 line[self.PID],
3131 line[self.TID],
3132 line[self.PROCESSOR_ID],
3133 line[self.TIMESTAMP],
3134 ] + line[self.USER_DATA:]
3135
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003136 # must not convert the trim() call into a list, since it will use too much
3137 # memory for large trace. use a csv file as a workaround since the json
3138 # parser requires a complete in-memory file.
3139 with open('%s.preprocessed' % self._logname, 'wb') as f:
3140 # $ and * can't be used in file name on windows, reducing the likelihood
3141 # of having to escape a string.
3142 out = csv.writer(
3143 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003144 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003145 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003146
3147 def _convert_log(self, logformat):
3148 """Converts the ETL trace to text representation.
3149
3150 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3151 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3152
3153 Arguments:
3154 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3155
3156 Use "tracerpt -?" for help.
3157 """
3158 LOCALE_INVARIANT = 0x7F
3159 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3160 cmd_convert = [
3161 'tracerpt.exe',
3162 '-l', self._logname + '.etl',
3163 '-o', self._logname + '.' + logformat,
3164 '-gmt', # Use UTC
3165 '-y', # No prompt
3166 # Use -of XML to get the header of each items after column 19, e.g. all
3167 # the actual headers of 'User Data'.
3168 ]
3169
3170 if logformat == 'csv':
3171 # tracerpt localizes the 'Type' column, for major brainfuck
3172 # entertainment. I can't imagine any sane reason to do that.
3173 cmd_convert.extend(['-of', 'CSV'])
3174 elif logformat == 'csv_utf16':
3175 # This causes it to use UTF-16, which doubles the log size but ensures
3176 # the log is readable for non-ASCII characters.
3177 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3178 elif logformat == 'xml':
3179 cmd_convert.extend(['-of', 'XML'])
3180 else:
3181 raise ValueError('Unexpected log format \'%s\'' % logformat)
3182 logging.debug('Running: %s' % cmd_convert)
3183 # This can takes tens of minutes for large logs.
3184 # Redirects all output to stderr.
3185 subprocess.check_call(
3186 cmd_convert,
3187 stdin=subprocess.PIPE,
3188 stdout=sys.stderr,
3189 stderr=sys.stderr)
3190
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003191 def __init__(self, use_sudo=False): # pylint: disable=W0613
3192 super(LogmanTrace, self).__init__()
3193 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3194
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003195 @staticmethod
3196 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003197 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003198 if os.path.isfile(logname + ext):
3199 os.remove(logname + ext)
3200
3201 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003202 def parse_log(cls, logname, blacklist, trace_name):
3203 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003204 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003205
3206 def blacklist_more(filepath):
3207 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3208 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3209
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003210 # Create a list of (Context, result_dict) tuples. This is necessary because
3211 # the csv file may be larger than the amount of available memory.
3212 contexes = [
3213 (
3214 cls.Context(
3215 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3216 {
3217 'output': item['output'],
3218 'trace': item['trace'],
3219 },
3220 )
3221 for item in read_json(logname)['traces']
3222 if not trace_name or item['trace'] == trace_name
3223 ]
3224
3225 # The log may be too large to fit in memory and it is not efficient to read
3226 # it multiple times, so multiplex the contexes instead, which is slightly
3227 # more awkward.
3228 with open('%s.preprocessed' % logname, 'rb') as f:
3229 lines = csv.reader(
3230 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3231 for encoded in lines:
3232 line = [s.decode('utf-8') for s in encoded]
3233 # Convert the PID in-place from hex.
3234 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3235 for context in contexes:
3236 if 'exception' in context[1]:
3237 continue
3238 try:
3239 context[0].on_line(line)
3240 except TracingFailure:
3241 context[1]['exception'] = sys.exc_info()
3242
3243 for context in contexes:
3244 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003245 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003246 context[1]['results'] = context[0].to_results()
3247
3248 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003249
3250
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003251def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003252 """Returns the correct implementation for the current OS."""
3253 if sys.platform == 'cygwin':
3254 raise NotImplementedError(
3255 'Not implemented for cygwin, start the script from Win32 python')
3256 flavors = {
3257 'win32': LogmanTrace,
3258 'darwin': Dtrace,
3259 'sunos5': Dtrace,
3260 'freebsd7': Dtrace,
3261 'freebsd8': Dtrace,
3262 }
3263 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003264 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003265
3266
3267def extract_directories(root_dir, files, blacklist):
3268 """Detects if all the files in a directory are in |files| and if so, replace
3269 the individual files by a Results.Directory instance.
3270
3271 Takes a list of Results.File instances and returns a shorter list of
3272 Results.File and Results.Directory instances.
3273
3274 Arguments:
3275 - root_dir: Optional base directory that shouldn't be search further.
3276 - files: list of Results.File instances.
3277 - blacklist: regexp of files to ignore, for example r'.+\.pyc'.
3278 """
3279 logging.info(
3280 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3281 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003282 # It is important for root_dir to not be a symlinked path, make sure to call
3283 # os.path.realpath() as needed.
3284 assert not root_dir or (
3285 os.path.realpath(get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003286 assert not any(isinstance(f, Results.Directory) for f in files)
3287 # Remove non existent files.
3288 files = [f for f in files if f.existent]
3289 if not files:
3290 return files
3291 # All files must share the same root, which can be None.
3292 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3293
3294 # Creates a {directory: {filename: File}} mapping, up to root.
3295 buckets = {}
3296 if root_dir:
3297 buckets[root_dir] = {}
3298 for fileobj in files:
3299 path = fileobj.full_path
3300 directory = os.path.dirname(path)
3301 assert directory
3302 # Do not use os.path.basename() so trailing os.path.sep is kept.
3303 basename = path[len(directory)+1:]
3304 files_in_directory = buckets.setdefault(directory, {})
3305 files_in_directory[basename] = fileobj
3306 # Add all the directories recursively up to root.
3307 while True:
3308 old_d = directory
3309 directory = os.path.dirname(directory)
3310 if directory + os.path.sep == root_dir or directory == old_d:
3311 break
3312 buckets.setdefault(directory, {})
3313
3314 root_prefix = len(root_dir) + 1 if root_dir else 0
3315 for directory in sorted(buckets, reverse=True):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003316 if not os.path.isdir(directory):
3317 logging.debug(
3318 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3319 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003320 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3321 expected = set(buckets[directory])
3322 if not (actual - expected):
3323 parent = os.path.dirname(directory)
3324 buckets[parent][os.path.basename(directory)] = Results.Directory(
3325 root_dir,
3326 directory[root_prefix:],
3327 False,
3328 sum(f.size for f in buckets[directory].itervalues()),
3329 sum(f.nb_files for f in buckets[directory].itervalues()))
3330 # Remove the whole bucket.
3331 del buckets[directory]
3332
3333 # Reverse the mapping with what remains. The original instances are returned,
3334 # so the cached meta data is kept.
3335 files = sum((x.values() for x in buckets.itervalues()), [])
3336 return sorted(files, key=lambda x: x.path)
3337
3338
3339def trace(logfile, cmd, cwd, api, output):
3340 """Traces an executable. Returns (returncode, output) from api.
3341
3342 Arguments:
3343 - logfile: file to write to.
3344 - cmd: command to run.
3345 - cwd: current directory to start the process in.
3346 - api: a tracing api instance.
3347 - output: if True, returns output, otherwise prints it at the console.
3348 """
3349 cmd = fix_python_path(cmd)
3350 api.clean_trace(logfile)
3351 with api.get_tracer(logfile) as tracer:
3352 return tracer.trace(cmd, cwd, 'default', output)
3353
3354
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003355def CMDclean(args):
3356 """Cleans up traces."""
3357 parser = OptionParserTraceInputs(command='clean')
3358 options, args = parser.parse_args(args)
3359 api = get_api()
3360 api.clean_trace(options.log)
3361 return 0
3362
3363
3364def CMDtrace(args):
3365 """Traces an executable."""
3366 parser = OptionParserTraceInputs(command='trace')
3367 parser.allow_interspersed_args = False
3368 parser.add_option(
3369 '-q', '--quiet', action='store_true',
3370 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003371 parser.add_option(
3372 '-s', '--sudo', action='store_true',
3373 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3374 parser.add_option(
3375 '-n', '--no-sudo', action='store_false',
3376 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003377 options, args = parser.parse_args(args)
3378
3379 if not args:
3380 parser.error('Please provide a command to run')
3381
3382 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3383 args[0] = os.path.abspath(args[0])
3384
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003385 # options.sudo default value is None, which is to do whatever tracer defaults
3386 # do.
3387 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003388 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3389
3390
3391def CMDread(args):
3392 """Reads the logs and prints the result."""
3393 parser = OptionParserTraceInputs(command='read')
3394 parser.add_option(
3395 '-V', '--variable',
3396 nargs=2,
3397 action='append',
3398 dest='variables',
3399 metavar='VAR_NAME directory',
3400 default=[],
3401 help=('Variables to replace relative directories against. Example: '
3402 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3403 'home dir with $HOME') % getpass.getuser())
3404 parser.add_option(
3405 '--root-dir',
3406 help='Root directory to base everything off it. Anything outside of this '
3407 'this directory will not be reported')
3408 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003409 '--trace-name',
3410 help='Only reads one of the trace. Defaults to reading all traces')
3411 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003412 '-j', '--json', action='store_true',
3413 help='Outputs raw result data as json')
3414 parser.add_option(
3415 '-b', '--blacklist', action='append', default=[],
3416 help='List of regexp to use as blacklist filter')
3417 options, args = parser.parse_args(args)
3418
3419 if options.root_dir:
maruel@chromium.org7dc17b32013-04-02 22:27:05 +00003420 options.root_dir = get_native_path_case(
3421 unicode(os.path.abspath(options.root_dir)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003422
3423 variables = dict(options.variables)
3424 api = get_api()
3425 def blacklist(f):
3426 return any(re.match(b, f) for b in options.blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003427 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003428 # Process each trace.
3429 output_as_json = []
3430 for item in data:
3431 if 'exception' in item:
maruel@chromium.org424f5692012-10-15 15:42:38 +00003432 # Do not abort the other traces.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003433 print >> sys.stderr, (
maruel@chromium.org424f5692012-10-15 15:42:38 +00003434 'Trace %s: Got an exception: %s' % (
3435 item['trace'], item['exception'][1]))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003436 continue
3437 results = item['results']
3438 if options.root_dir:
3439 results = results.strip_root(options.root_dir)
3440
3441 if options.json:
3442 output_as_json.append(results.flatten())
3443 else:
3444 simplified = extract_directories(
3445 options.root_dir, results.files, blacklist)
3446 simplified = [f.replace_variables(variables) for f in simplified]
3447 if len(data) > 1:
3448 print('Trace: %s' % item['trace'])
3449 print('Total: %d' % len(results.files))
3450 print('Non existent: %d' % len(results.non_existent))
3451 for f in results.non_existent:
3452 print(' %s' % f.path)
3453 print(
3454 'Interesting: %d reduced to %d' % (
3455 len(results.existent), len(simplified)))
3456 for f in simplified:
3457 print(' %s' % f.path)
3458
3459 if options.json:
3460 write_json(sys.stdout, output_as_json, False)
3461 return 0
3462
3463
3464class OptionParserWithLogging(optparse.OptionParser):
3465 """Adds --verbose option."""
3466 def __init__(self, verbose=0, **kwargs):
3467 optparse.OptionParser.__init__(self, **kwargs)
3468 self.add_option(
3469 '-v', '--verbose',
3470 action='count',
3471 default=verbose,
3472 help='Use multiple times to increase verbosity')
3473
3474 def parse_args(self, *args, **kwargs):
3475 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
3476 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
3477 logging.basicConfig(
3478 level=levels[min(len(levels)-1, options.verbose)],
3479 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
3480 return options, args
3481
3482
3483class OptionParserWithNiceDescription(OptionParserWithLogging):
3484 """Generates the description with the command's docstring."""
3485 def __init__(self, **kwargs):
3486 """Sets 'description' and 'usage' if not already specified."""
3487 command = kwargs.pop('command', 'help')
3488 kwargs.setdefault(
3489 'description',
3490 re.sub('[\r\n ]{2,}', ' ', get_command_handler(command).__doc__))
3491 kwargs.setdefault('usage', '%%prog %s [options]' % command)
3492 OptionParserWithLogging.__init__(self, **kwargs)
3493
3494
3495class OptionParserTraceInputs(OptionParserWithNiceDescription):
3496 """Adds automatic --log handling."""
3497 def __init__(self, **kwargs):
3498 OptionParserWithNiceDescription.__init__(self, **kwargs)
3499 self.add_option(
3500 '-l', '--log', help='Log file to generate or read, required')
3501
3502 def parse_args(self, *args, **kwargs):
3503 """Makes sure the paths make sense.
3504
3505 On Windows, / and \ are often mixed together in a path.
3506 """
3507 options, args = OptionParserWithNiceDescription.parse_args(
3508 self, *args, **kwargs)
3509 if not options.log:
3510 self.error('Must supply a log file with -l')
3511 options.log = os.path.abspath(options.log)
3512 return options, args
3513
3514
3515def extract_documentation():
3516 """Returns a dict {command: description} for each of documented command."""
3517 commands = (
3518 fn[3:]
3519 for fn in dir(sys.modules['__main__'])
3520 if fn.startswith('CMD') and get_command_handler(fn[3:]).__doc__)
3521 return dict((fn, get_command_handler(fn).__doc__) for fn in commands)
3522
3523
3524def CMDhelp(args):
3525 """Prints list of commands or help for a specific command."""
3526 doc = extract_documentation()
3527 # Calculates the optimal offset.
3528 offset = max(len(cmd) for cmd in doc)
3529 format_str = ' %-' + str(offset + 2) + 's %s'
3530 # Generate a one-liner documentation of each commands.
3531 commands_description = '\n'.join(
3532 format_str % (cmd, doc[cmd].split('\n')[0]) for cmd in sorted(doc))
3533
3534 parser = OptionParserWithNiceDescription(
3535 usage='%prog <command> [options]',
3536 description='Commands are:\n%s\n' % commands_description)
3537 parser.format_description = lambda _: parser.description
3538
3539 # Strip out any -h or --help argument.
3540 _, args = parser.parse_args([i for i in args if not i in ('-h', '--help')])
3541 if len(args) == 1:
3542 if not get_command_handler(args[0]):
3543 parser.error('Unknown command %s' % args[0])
3544 # The command was "%prog help command", replaces ourself with
3545 # "%prog command --help" so help is correctly printed out.
3546 return main(args + ['--help'])
3547 elif args:
3548 parser.error('Unknown argument "%s"' % ' '.join(args))
3549 parser.print_help()
3550 return 0
3551
3552
3553def get_command_handler(name):
3554 """Returns the command handler or CMDhelp if it doesn't exist."""
3555 return getattr(sys.modules['__main__'], 'CMD%s' % name, None)
3556
3557
3558def main_impl(argv):
3559 command = get_command_handler(argv[0] if argv else 'help')
3560 if not command:
3561 return CMDhelp(argv)
3562 return command(argv[1:])
3563
3564def main(argv):
maruel@chromium.org46e61cc2013-03-25 19:55:34 +00003565 run_isolated.disable_buffering()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003566 try:
3567 main_impl(argv)
3568 except TracingFailure, e:
3569 sys.stderr.write('\nError: ')
3570 sys.stderr.write(str(e))
3571 sys.stderr.write('\n')
3572 return 1
3573
3574
3575if __name__ == '__main__':
3576 sys.exit(main(sys.argv[1:]))