blob: 26f7ab571cb83846426eb7379e28cbd4c558b740 [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
3# Copyright (c) 2012 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
23import getpass
24import glob
25import json
26import logging
27import optparse
28import os
29import re
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000030import stat
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000031import subprocess
32import sys
33import tempfile
34import threading
35import time
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000036import unicodedata
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000037import weakref
38
39## OS-specific imports
40
41if sys.platform == 'win32':
42 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p
43 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611
44 from ctypes.wintypes import GetLastError # pylint: disable=E0611
45elif sys.platform == 'darwin':
46 import Carbon.File # pylint: disable=F0401
47 import MacOS # pylint: disable=F0401
48
maruel@chromium.org46e61cc2013-03-25 19:55:34 +000049import run_isolated
50
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000051
52BASE_DIR = os.path.dirname(os.path.abspath(__file__))
53ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
54
55
56class TracingFailure(Exception):
57 """An exception occured during tracing."""
58 def __init__(self, description, pid, line_number, line, *args):
59 super(TracingFailure, self).__init__(
60 description, pid, line_number, line, *args)
61 self.description = description
62 self.pid = pid
63 self.line_number = line_number
64 self.line = line
65 self.extra = args
66
67 def __str__(self):
68 out = self.description
69 if self.pid:
70 out += '\npid: %d' % self.pid
71 if self.line_number:
72 out += '\nline: %d' % self.line_number
73 if self.line:
74 out += '\n%s' % self.line
75 if self.extra:
76 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
77 return out
78
79
80## OS-specific functions
81
82if sys.platform == 'win32':
83 def QueryDosDevice(drive_letter):
84 """Returns the Windows 'native' path for a DOS drive letter."""
85 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
maruel@chromium.org306e0e72012-11-02 18:22:03 +000086 assert isinstance(drive_letter, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000087 # Guesswork. QueryDosDeviceW never returns the required number of bytes.
88 chars = 1024
maruel@chromium.org306e0e72012-11-02 18:22:03 +000089 drive_letter = drive_letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000090 p = create_unicode_buffer(chars)
91 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):
92 err = GetLastError()
93 if err:
94 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +000095 msg = u'QueryDosDevice(%s): %s (%d)' % (
96 drive_letter, FormatError(err), err)
97 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000098 return p.value
99
100
101 def GetShortPathName(long_path):
102 """Returns the Windows short path equivalent for a 'long' path."""
maruel@chromium.org68bb1482013-02-15 22:36:24 +0000103 assert isinstance(long_path, unicode), repr(long_path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000104 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
105 # not enforced.
106 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):
107 long_path = '\\\\?\\' + long_path
108 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)
109 if chars:
110 p = create_unicode_buffer(chars)
111 if windll.kernel32.GetShortPathNameW(long_path, p, chars):
112 return p.value
113
114 err = GetLastError()
115 if err:
116 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000117 msg = u'GetShortPathName(%s): %s (%d)' % (
118 long_path, FormatError(err), err)
119 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000120
121
122 def GetLongPathName(short_path):
123 """Returns the Windows long path equivalent for a 'short' path."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000124 assert isinstance(short_path, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000125 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
126 # not enforced.
127 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):
128 short_path = '\\\\?\\' + short_path
129 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)
130 if chars:
131 p = create_unicode_buffer(chars)
132 if windll.kernel32.GetLongPathNameW(short_path, p, chars):
133 return p.value
134
135 err = GetLastError()
136 if err:
137 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000138 msg = u'GetLongPathName(%s): %s (%d)' % (
139 short_path, FormatError(err), err)
140 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000141
142
143 def get_current_encoding():
144 """Returns the 'ANSI' code page associated to the process."""
145 return 'cp%d' % int(windll.kernel32.GetACP())
146
147
148 class DosDriveMap(object):
149 """Maps \Device\HarddiskVolumeN to N: on Windows."""
150 # Keep one global cache.
151 _MAPPING = {}
152
153 def __init__(self):
154 """Lazy loads the cache."""
155 if not self._MAPPING:
156 # This is related to UNC resolver on windows. Ignore that.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000157 self._MAPPING[u'\\Device\\Mup'] = None
158 self._MAPPING[u'\\SystemRoot'] = os.environ[u'SystemRoot']
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000159
160 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
161 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000162 letter = u'%s:' % letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000163 mapped = QueryDosDevice(letter)
164 if mapped in self._MAPPING:
165 logging.warn(
166 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
167 '. Drive letters are a user-mode concept and the kernel '
168 'traces only have NT path, so all accesses will be '
169 'associated with the first drive letter, independent of the '
170 'actual letter used by the code') % (
171 self._MAPPING[mapped], letter))
172 else:
173 self._MAPPING[mapped] = letter
174 except WindowsError: # pylint: disable=E0602
175 pass
176
177 def to_win32(self, path):
178 """Converts a native NT path to Win32/DOS compatible path."""
179 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
180 if not match:
181 raise ValueError(
182 'Can\'t convert %s into a Win32 compatible path' % path,
183 path)
184 if not match.group(1) in self._MAPPING:
185 # Unmapped partitions may be accessed by windows for the
186 # fun of it while the test is running. Discard these.
187 return None
188 drive = self._MAPPING[match.group(1)]
189 if not drive or not match.group(2):
190 return drive
191 return drive + match.group(2)
192
193
194 def isabs(path):
195 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""
196 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
197
198
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000199 def find_item_native_case(root, item):
200 """Gets the native path case of a single item based at root_path."""
201 if item == '..':
202 return item
203
204 root = get_native_path_case(root)
205 return os.path.basename(get_native_path_case(os.path.join(root, item)))
206
207
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000208 def get_native_path_case(p):
209 """Returns the native path case for an existing file.
210
211 On Windows, removes any leading '\\?\'.
212 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000213 assert isinstance(p, unicode), repr(p)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000214 if not isabs(p):
215 raise ValueError(
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000216 'get_native_path_case(%r): Require an absolute path' % p, p)
217
maruel@chromium.org037758d2012-12-10 17:59:46 +0000218 # Make sure it is normalized to os.path.sep. Do not do it here to keep the
219 # function fast
220 assert '/' not in p, p
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000221 suffix = ''
222 count = p.count(':')
223 if count > 1:
224 # This means it has an alternate-data stream. There could be 3 ':', since
225 # it could be the $DATA datastream of an ADS. Split the whole ADS suffix
226 # off and add it back afterward. There is no way to know the native path
227 # case of an alternate data stream.
228 items = p.split(':')
229 p = ':'.join(items[0:2])
230 suffix = ''.join(':' + i for i in items[2:])
231
maruel@chromium.org42b091d2013-04-02 22:27:02 +0000232 # TODO(maruel): Use os.path.normpath?
233 if p.endswith('.\\'):
234 p = p[:-2]
235
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000236 # Windows used to have an option to turn on case sensitivity on non Win32
237 # subsystem but that's out of scope here and isn't supported anymore.
238 # Go figure why GetShortPathName() is needed.
239 try:
240 out = GetLongPathName(GetShortPathName(p))
241 except OSError, e:
242 if e.args[0] in (2, 3, 5):
243 # The path does not exist. Try to recurse and reconstruct the path.
244 base = os.path.dirname(p)
245 rest = os.path.basename(p)
246 return os.path.join(get_native_path_case(base), rest)
247 raise
248 if out.startswith('\\\\?\\'):
249 out = out[4:]
250 # Always upper case the first letter since GetLongPathName() will return the
251 # drive letter in the case it was given.
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000252 return out[0].upper() + out[1:] + suffix
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000253
254
255 def CommandLineToArgvW(command_line):
256 """Splits a commandline into argv using CommandLineToArgvW()."""
257 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
258 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000259 assert isinstance(command_line, unicode)
260 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000261 try:
262 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
263 finally:
264 windll.kernel32.LocalFree(ptr)
265
266
267elif sys.platform == 'darwin':
268
269
270 # On non-windows, keep the stdlib behavior.
271 isabs = os.path.isabs
272
273
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000274 def _native_case(p):
275 """Gets the native path case. Warning: this function resolves symlinks."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000276 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000277 rel_ref, _ = Carbon.File.FSPathMakeRef(p.encode('utf-8'))
278 # The OSX underlying code uses NFD but python strings are in NFC. This
279 # will cause issues with os.listdir() for example. Since the dtrace log
280 # *is* in NFC, normalize it here.
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000281 out = unicodedata.normalize(
282 'NFC', rel_ref.FSRefMakePath().decode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000283 if p.endswith(os.path.sep) and not out.endswith(os.path.sep):
284 return out + os.path.sep
285 return out
286 except MacOS.Error, e:
287 if e.args[0] in (-43, -120):
288 # The path does not exist. Try to recurse and reconstruct the path.
289 # -43 means file not found.
290 # -120 means directory not found.
291 base = os.path.dirname(p)
292 rest = os.path.basename(p)
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000293 return os.path.join(_native_case(base), rest)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000294 raise OSError(
295 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])
296
297
298 def _split_at_symlink_native(base_path, rest):
299 """Returns the native path for a symlink."""
300 base, symlink, rest = split_at_symlink(base_path, rest)
301 if symlink:
302 if not base_path:
303 base_path = base
304 else:
305 base_path = safe_join(base_path, base)
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000306 symlink = find_item_native_case(base_path, symlink)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000307 return base, symlink, rest
308
309
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000310 def find_item_native_case(root_path, item):
311 """Gets the native path case of a single item based at root_path.
312
313 There is no API to get the native path case of symlinks on OSX. So it
314 needs to be done the slow way.
315 """
316 if item == '..':
317 return item
318
319 item = item.lower()
320 for element in os.listdir(root_path):
321 if element.lower() == item:
322 return element
323
324
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000325 def get_native_path_case(path):
326 """Returns the native path case for an existing file.
327
328 Technically, it's only HFS+ on OSX that is case preserving and
329 insensitive. It's the default setting on HFS+ but can be changed.
330 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000331 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000332 if not isabs(path):
333 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000334 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000335 if path.startswith('/dev'):
336 # /dev is not visible from Carbon, causing an exception.
337 return path
338
339 # Starts assuming there is no symlink along the path.
340 resolved = _native_case(path)
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000341 if path.lower() in (resolved.lower(), resolved.lower() + './'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000342 # This code path is incredibly faster.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000343 logging.debug('get_native_path_case(%s) = %s' % (path, resolved))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000344 return resolved
345
346 # There was a symlink, process it.
347 base, symlink, rest = _split_at_symlink_native(None, path)
348 assert symlink, (path, base, symlink, rest, resolved)
349 prev = base
350 base = safe_join(_native_case(base), symlink)
351 assert len(base) > len(prev)
352 while rest:
353 prev = base
354 relbase, symlink, rest = _split_at_symlink_native(base, rest)
355 base = safe_join(base, relbase)
356 assert len(base) > len(prev), (prev, base, symlink)
357 if symlink:
358 base = safe_join(base, symlink)
359 assert len(base) > len(prev), (prev, base, symlink)
360 # Make sure no symlink was resolved.
361 assert base.lower() == path.lower(), (base, path)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000362 logging.debug('get_native_path_case(%s) = %s' % (path, base))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000363 return base
364
365
366else: # OSes other than Windows and OSX.
367
368
369 # On non-windows, keep the stdlib behavior.
370 isabs = os.path.isabs
371
372
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000373 def find_item_native_case(root, item):
374 """Gets the native path case of a single item based at root_path."""
375 if item == '..':
376 return item
377
378 root = get_native_path_case(root)
379 return os.path.basename(get_native_path_case(os.path.join(root, item)))
380
381
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000382 def get_native_path_case(path):
383 """Returns the native path case for an existing file.
384
385 On OSes other than OSX and Windows, assume the file system is
386 case-sensitive.
387
388 TODO(maruel): This is not strictly true. Implement if necessary.
389 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000390 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000391 if not isabs(path):
392 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000393 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000394 # Give up on cygwin, as GetLongPathName() can't be called.
395 # Linux traces tends to not be normalized so use this occasion to normalize
396 # it. This function implementation already normalizes the path on the other
397 # OS so this needs to be done here to be coherent between OSes.
398 out = os.path.normpath(path)
399 if path.endswith(os.path.sep) and not out.endswith(os.path.sep):
400 return out + os.path.sep
401 return out
402
403
404if sys.platform != 'win32': # All non-Windows OSes.
405
406
407 def safe_join(*args):
408 """Joins path elements like os.path.join() but doesn't abort on absolute
409 path.
410
411 os.path.join('foo', '/bar') == '/bar'
412 but safe_join('foo', '/bar') == 'foo/bar'.
413 """
414 out = ''
415 for element in args:
416 if element.startswith(os.path.sep):
417 if out.endswith(os.path.sep):
418 out += element[1:]
419 else:
420 out += element
421 else:
422 if out.endswith(os.path.sep):
423 out += element
424 else:
425 out += os.path.sep + element
426 return out
427
428
429 def split_at_symlink(base_dir, relfile):
430 """Scans each component of relfile and cut the string at the symlink if
431 there is any.
432
433 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if
434 not symlink was found.
435 """
436 if base_dir:
437 assert relfile
438 assert os.path.isabs(base_dir)
439 index = 0
440 else:
441 assert os.path.isabs(relfile)
442 index = 1
443
444 def at_root(rest):
445 if base_dir:
446 return safe_join(base_dir, rest)
447 return rest
448
449 while True:
450 try:
451 index = relfile.index(os.path.sep, index)
452 except ValueError:
453 index = len(relfile)
454 full = at_root(relfile[:index])
455 if os.path.islink(full):
456 # A symlink!
457 base = os.path.dirname(relfile[:index])
458 symlink = os.path.basename(relfile[:index])
459 rest = relfile[index:]
460 logging.debug(
461 'split_at_symlink(%s, %s) -> (%s, %s, %s)' %
462 (base_dir, relfile, base, symlink, rest))
463 return base, symlink, rest
464 if index == len(relfile):
465 break
466 index += 1
467 return relfile, None, None
468
469
470def fix_python_path(cmd):
471 """Returns the fixed command line to call the right python executable."""
472 out = cmd[:]
473 if out[0] == 'python':
474 out[0] = sys.executable
475 elif out[0].endswith('.py'):
476 out.insert(0, sys.executable)
477 return out
478
479
480def create_thunk():
481 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
482 os.write(
483 handle,
484 (
485 'import subprocess\n'
486 'import sys\n'
487 'sys.exit(subprocess.call(sys.argv[2:]))\n'
488 ))
489 os.close(handle)
490 return name
491
492
493def strace_process_quoted_arguments(text):
494 """Extracts quoted arguments on a string and return the arguments as a list.
495
496 Implemented as an automaton. Supports incomplete strings in the form
497 '"foo"...'.
498
499 Example:
500 With text = '"foo", "bar"', the function will return ['foo', 'bar']
501
502 TODO(maruel): Implement escaping.
503 """
504 # All the possible states of the DFA.
505 ( NEED_QUOTE, # Begining of a new arguments.
506 INSIDE_STRING, # Inside an argument.
507 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
508 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
509 # a serie of 3 dots or a comma.
510 NEED_SPACE, # Right after a comma
511 NEED_DOT_2, # Found a dot, need a second one.
512 NEED_DOT_3, # Found second dot, need a third one.
513 NEED_COMMA, # Found third dot, need a comma.
514 ) = range(8)
515
516 state = NEED_QUOTE
517 out = []
518 for index, char in enumerate(text):
519 if char == '"':
520 if state == NEED_QUOTE:
521 state = INSIDE_STRING
522 # A new argument was found.
523 out.append('')
524 elif state == INSIDE_STRING:
525 # The argument is now closed.
526 state = NEED_COMMA_OR_DOT
527 elif state == ESCAPED:
528 out[-1] += char
529 state = INSIDE_STRING
530 else:
531 raise ValueError(
532 'Can\'t process char at column %d for: %r' % (index, text),
533 index,
534 text)
535 elif char == ',':
536 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
537 state = NEED_SPACE
538 elif state == INSIDE_STRING:
539 out[-1] += char
540 elif state == ESCAPED:
541 out[-1] += char
542 state = INSIDE_STRING
543 else:
544 raise ValueError(
545 'Can\'t process char at column %d for: %r' % (index, text),
546 index,
547 text)
548 elif char == ' ':
549 if state == NEED_SPACE:
550 state = NEED_QUOTE
551 elif state == INSIDE_STRING:
552 out[-1] += char
553 elif state == ESCAPED:
554 out[-1] += char
555 state = INSIDE_STRING
556 else:
557 raise ValueError(
558 'Can\'t process char at column %d for: %r' % (index, text),
559 index,
560 text)
561 elif char == '.':
562 if state == NEED_COMMA_OR_DOT:
563 # The string is incomplete, this mean the strace -s flag should be
564 # increased.
565 state = NEED_DOT_2
566 elif state == NEED_DOT_2:
567 state = NEED_DOT_3
568 elif state == NEED_DOT_3:
569 state = NEED_COMMA
570 elif state == INSIDE_STRING:
571 out[-1] += char
572 elif state == ESCAPED:
573 out[-1] += char
574 state = INSIDE_STRING
575 else:
576 raise ValueError(
577 'Can\'t process char at column %d for: %r' % (index, text),
578 index,
579 text)
580 elif char == '\\':
581 if state == ESCAPED:
582 out[-1] += char
583 state = INSIDE_STRING
584 elif state == INSIDE_STRING:
585 state = ESCAPED
586 else:
587 raise ValueError(
588 'Can\'t process char at column %d for: %r' % (index, text),
589 index,
590 text)
591 else:
592 if state == INSIDE_STRING:
593 out[-1] += char
594 else:
595 raise ValueError(
596 'Can\'t process char at column %d for: %r' % (index, text),
597 index,
598 text)
599 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
600 raise ValueError(
601 'String is incorrectly terminated: %r' % text,
602 text)
603 return out
604
605
606def read_json(filepath):
607 with open(filepath, 'r') as f:
608 return json.load(f)
609
610
611def write_json(filepath_or_handle, data, dense):
612 """Writes data into filepath or file handle encoded as json.
613
614 If dense is True, the json is packed. Otherwise, it is human readable.
615 """
616 if hasattr(filepath_or_handle, 'write'):
617 if dense:
618 filepath_or_handle.write(
619 json.dumps(data, sort_keys=True, separators=(',',':')))
620 else:
621 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
622 else:
623 with open(filepath_or_handle, 'wb') as f:
624 if dense:
625 json.dump(data, f, sort_keys=True, separators=(',',':'))
626 else:
627 json.dump(data, f, sort_keys=True, indent=2)
628
629
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000630def assert_is_renderable(pseudo_string):
631 """Asserts the input is a valid object to be processed by render()."""
632 assert (
633 isinstance(pseudo_string, (None.__class__, unicode)) or
634 hasattr(pseudo_string, 'render')), repr(pseudo_string)
635
636
637def render(pseudo_string):
638 """Converts the pseudo-string to an unicode string."""
639 assert_is_renderable(pseudo_string)
640 if isinstance(pseudo_string, (None.__class__, unicode)):
641 return pseudo_string
642 return pseudo_string.render()
643
644
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000645class Results(object):
646 """Results of a trace session."""
647
648 class _TouchedObject(object):
649 """Something, a file or a directory, that was accessed."""
650 def __init__(self, root, path, tainted, size, nb_files):
651 logging.debug(
652 '%s(%s, %s, %s, %s, %s)' %
653 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000654 assert_is_renderable(root)
655 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000656 self.root = root
657 self.path = path
658 self.tainted = tainted
659 self.nb_files = nb_files
660 # Can be used as a cache or a default value, depending on context.
661 self._size = size
662 # These are cache only.
663 self._real_path = None
664
665 # Check internal consistency.
666 assert path, path
667 assert tainted or bool(root) != bool(isabs(path)), (root, path)
668 assert tainted or (
669 not os.path.exists(self.full_path) or
670 (self.full_path == get_native_path_case(self.full_path))), (
671 tainted, self.full_path, get_native_path_case(self.full_path))
672
673 @property
674 def existent(self):
675 return self.size != -1
676
677 @property
678 def full_path(self):
679 if self.root:
680 return os.path.join(self.root, self.path)
681 return self.path
682
683 @property
684 def real_path(self):
685 """Returns the path with symlinks resolved."""
686 if not self._real_path:
687 self._real_path = os.path.realpath(self.full_path)
688 return self._real_path
689
690 @property
691 def size(self):
692 """File's size. -1 is not existent."""
693 if self._size is None and not self.tainted:
694 try:
695 self._size = os.stat(self.full_path).st_size
696 except OSError:
697 self._size = -1
698 return self._size
699
700 def flatten(self):
701 """Returns a dict representing this object.
702
703 A 'size' of 0 means the file was only touched and not read.
704 """
705 return {
706 'path': self.path,
707 'size': self.size,
708 }
709
710 def replace_variables(self, variables):
711 """Replaces the root of this File with one of the variables if it matches.
712
713 If a variable replacement occurs, the cloned object becomes tainted.
714 """
715 for variable, root_path in variables.iteritems():
716 if self.path.startswith(root_path):
717 return self._clone(
718 self.root, variable + self.path[len(root_path):], True)
719 # No need to clone, returns ourself.
720 return self
721
722 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000723 """Returns a clone of itself with 'root' stripped off.
724
725 Note that the file is kept if it is either accessible from a symlinked
726 path that was used to access the file or through the real path.
727 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000728 # Check internal consistency.
729 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root
730 if not self.full_path.startswith(root):
731 # Now try to resolve the symlinks to see if it can be reached this way.
732 # Only try *after* trying without resolving symlink.
733 if not self.real_path.startswith(root):
734 return None
735 path = self.real_path
736 else:
737 path = self.full_path
738 return self._clone(root, path[len(root):], self.tainted)
739
740 def _clone(self, new_root, new_path, tainted):
741 raise NotImplementedError(self.__class__.__name__)
742
743 class File(_TouchedObject):
744 """A file that was accessed. May not be present anymore.
745
746 If tainted is true, it means it is not a real path anymore as a variable
747 replacement occured.
748
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000749 If only_touched is True, this means the file was probed for existence, and
750 it is existent, but was never _opened_. If only_touched is True, the file
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000751 must have existed.
752 """
753 def __init__(self, root, path, tainted, size):
754 super(Results.File, self).__init__(root, path, tainted, size, 1)
755
756 def _clone(self, new_root, new_path, tainted):
757 """Clones itself keeping meta-data."""
758 # Keep the self.size and self._real_path caches for performance reason. It
759 # is also important when the file becomes tainted (with a variable instead
760 # of the real path) since self.path is not an on-disk path anymore so
761 # out._size cannot be updated.
762 out = self.__class__(new_root, new_path, tainted, self.size)
763 out._real_path = self._real_path
764 return out
765
766 class Directory(_TouchedObject):
767 """A directory of files. Must exist."""
768 def __init__(self, root, path, tainted, size, nb_files):
769 """path='.' is a valid value and must be handled appropriately."""
770 assert not path.endswith(os.path.sep), path
771 super(Results.Directory, self).__init__(
772 root, path + os.path.sep, tainted, size, nb_files)
773 # For a Directory instance, self.size is not a cache, it's an actual value
774 # that is never modified and represents the total size of the files
775 # contained in this directory. It is possible that the directory is empty
776 # so that size == 0; this happens if there's only an invalid symlink in
777 # it.
778
779 def flatten(self):
780 out = super(Results.Directory, self).flatten()
781 out['nb_files'] = self.nb_files
782 return out
783
784 def _clone(self, new_root, new_path, tainted):
785 """Clones itself keeping meta-data."""
786 out = self.__class__(
787 new_root,
788 new_path.rstrip(os.path.sep),
789 tainted,
790 self.size,
791 self.nb_files)
792 out._real_path = self._real_path
793 return out
794
795 class Process(object):
796 """A process that was traced.
797
798 Contains references to the files accessed by this process and its children.
799 """
800 def __init__(self, pid, files, executable, command, initial_cwd, children):
801 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
802 self.pid = pid
803 self.files = sorted(files, key=lambda x: x.path)
804 self.children = children
805 self.executable = executable
806 self.command = command
807 self.initial_cwd = initial_cwd
808
809 # Check internal consistency.
810 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
811 f.path for f in self.files)
812 assert isinstance(self.children, list)
813 assert isinstance(self.files, list)
814
815 @property
816 def all(self):
817 for child in self.children:
818 for i in child.all:
819 yield i
820 yield self
821
822 def flatten(self):
823 return {
824 'children': [c.flatten() for c in self.children],
825 'command': self.command,
826 'executable': self.executable,
827 'files': [f.flatten() for f in self.files],
828 'initial_cwd': self.initial_cwd,
829 'pid': self.pid,
830 }
831
832 def strip_root(self, root):
833 assert isabs(root) and root.endswith(os.path.sep), root
834 # Loads the files after since they are constructed as objects.
835 out = self.__class__(
836 self.pid,
837 filter(None, (f.strip_root(root) for f in self.files)),
838 self.executable,
839 self.command,
840 self.initial_cwd,
841 [c.strip_root(root) for c in self.children])
842 logging.debug(
843 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
844 return out
845
846 def __init__(self, process):
847 self.process = process
848 # Cache.
849 self._files = None
850
851 def flatten(self):
852 return {
853 'root': self.process.flatten(),
854 }
855
856 @property
857 def files(self):
858 if self._files is None:
859 self._files = sorted(
860 sum((p.files for p in self.process.all), []),
861 key=lambda x: x.path)
862 return self._files
863
864 @property
865 def existent(self):
866 return [f for f in self.files if f.existent]
867
868 @property
869 def non_existent(self):
870 return [f for f in self.files if not f.existent]
871
872 def strip_root(self, root):
873 """Returns a clone with all the files outside the directory |root| removed
874 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000875
876 It keeps files accessible through the |root| directory or that have been
877 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000878 """
879 # Resolve any symlink
880 root = os.path.realpath(root)
881 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep
882 logging.debug('strip_root(%s)' % root)
883 return Results(self.process.strip_root(root))
884
885
886class ApiBase(object):
887 """OS-agnostic API to trace a process and its children."""
888 class Context(object):
889 """Processes one log line at a time and keeps the list of traced processes.
890
891 The parsing is complicated by the fact that logs are traced out of order for
892 strace but in-order for dtrace and logman. In addition, on Windows it is
893 very frequent that processids are reused so a flat list cannot be used. But
894 at the same time, it is impossible to faithfully construct a graph when the
895 logs are processed out of order. So both a tree and a flat mapping are used,
896 the tree is the real process tree, while the flat mapping stores the last
897 valid process for the corresponding processid. For the strace case, the
898 tree's head is guessed at the last moment.
899 """
900 class Process(object):
901 """Keeps context for one traced child process.
902
903 Logs all the files this process touched. Ignores directories.
904 """
905 def __init__(self, blacklist, pid, initial_cwd):
906 # Check internal consistency.
907 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000908 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000909 self.pid = pid
910 # children are Process instances.
911 self.children = []
912 self.initial_cwd = initial_cwd
913 self.cwd = None
914 self.files = set()
915 self.only_touched = set()
916 self.executable = None
917 self.command = None
918 self._blacklist = blacklist
919
920 def to_results_process(self):
921 """Resolves file case sensitivity and or late-bound strings."""
922 # When resolving files, it's normal to get dupe because a file could be
923 # opened multiple times with different case. Resolve the deduplication
924 # here.
925 def fix_path(x):
926 """Returns the native file path case.
927
928 Converts late-bound strings.
929 """
930 if not x:
931 # Do not convert None instance to 'None'.
932 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000933 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000934 if os.path.isabs(x):
935 # If the path is not absolute, which tends to happen occasionally on
936 # Windows, it is not possible to get the native path case so ignore
937 # that trace. It mostly happens for 'executable' value.
938 x = get_native_path_case(x)
939 return x
940
941 def fix_and_blacklist_path(x):
942 x = fix_path(x)
943 if not x:
944 return
945 # The blacklist needs to be reapplied, since path casing could
946 # influence blacklisting.
947 if self._blacklist(x):
948 return
949 return x
950
951 # Filters out directories. Some may have passed through.
952 files = set(f for f in map(fix_and_blacklist_path, self.files) if f)
953 only_touched = set(
954 f for f in map(fix_and_blacklist_path, self.only_touched) if f)
955 only_touched -= files
956
957 files = [
958 Results.File(None, f, False, None) for f in files
959 if not os.path.isdir(f)
960 ]
961 # Using 0 as size means the file's content is ignored since the file was
962 # never opened for I/O.
963 files.extend(
964 Results.File(None, f, False, 0) for f in only_touched
965 if not os.path.isdir(f)
966 )
967 return Results.Process(
968 self.pid,
969 files,
970 fix_path(self.executable),
971 self.command,
972 fix_path(self.initial_cwd),
973 [c.to_results_process() for c in self.children])
974
975 def add_file(self, filepath, touch_only):
976 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000977 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000978 return
979 logging.debug('add_file(%d, %s, %s)' % (self.pid, filepath, touch_only))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000980 # Note that filepath and not render(filepath) is added. It is
981 # because filepath could be something else than a string, like a
982 # RelativePath instance for dtrace logs.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000983 if touch_only:
984 self.only_touched.add(filepath)
985 else:
986 self.files.add(filepath)
987
988 def __init__(self, blacklist):
989 self.blacklist = blacklist
990 # Initial process.
991 self.root_process = None
992 # dict to accelerate process lookup, to not have to lookup the whole graph
993 # each time.
994 self._process_lookup = {}
995
996 class Tracer(object):
997 """During it's lifetime, the tracing subsystem is enabled."""
998 def __init__(self, logname):
999 self._logname = logname
1000 self._lock = threading.Lock()
1001 self._traces = []
1002 self._initialized = True
1003
1004 def trace(self, cmd, cwd, tracename, output):
1005 """Runs the OS-specific trace program on an executable.
1006
1007 Arguments:
1008 - cmd: The command (a list) to run.
1009 - cwd: Current directory to start the child process in.
1010 - tracename: Name of the trace in the logname file.
1011 - output: If False, redirects output to PIPEs.
1012
1013 Returns a tuple (resultcode, output) and updates the internal trace
1014 entries.
1015 """
1016 # The implementation adds an item to self._traces.
1017 raise NotImplementedError(self.__class__.__name__)
1018
1019 def close(self, _timeout=None):
1020 """Saves the meta-data in the logname file.
1021
1022 For kernel-based tracing, stops the tracing subsystem.
1023
1024 Must not be used manually when using 'with' construct.
1025 """
1026 with self._lock:
1027 assert self._initialized
1028 try:
1029 data = {
1030 'traces': self._traces,
1031 }
1032 write_json(self._logname, data, False)
1033 finally:
1034 self._initialized = False
1035
1036 def post_process_log(self):
1037 """Post-processes the log so it becomes faster to load afterward.
1038
1039 Must not be used manually when using 'with' construct.
1040 """
1041 assert not self._initialized, 'Must stop tracing first.'
1042
1043 def __enter__(self):
1044 """Enables 'with' statement."""
1045 return self
1046
1047 def __exit__(self, exc_type, exc_value, traceback):
1048 """Enables 'with' statement."""
1049 self.close()
1050 # If an exception was thrown, do not process logs.
1051 if not exc_type:
1052 self.post_process_log()
1053
1054 def get_tracer(self, logname):
1055 """Returns an ApiBase.Tracer instance.
1056
1057 Initializes the tracing subsystem, which is a requirement for kernel-based
1058 tracers. Only one tracer instance should be live at a time!
1059
1060 logname is the filepath to the json file that will contain the meta-data
1061 about the logs.
1062 """
1063 return self.Tracer(logname)
1064
1065 @staticmethod
1066 def clean_trace(logname):
1067 """Deletes an old log."""
1068 raise NotImplementedError()
1069
1070 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001071 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001072 """Processes trace logs and returns the files opened and the files that do
1073 not exist.
1074
1075 It does not track directories.
1076
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001077 Arguments:
1078 - logname: must be an absolute path.
1079 - blacklist: must be a lambda.
1080 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001081
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001082 Most of the time, files that do not exist are temporary test files that
1083 should be put in /tmp instead. See http://crbug.com/116251.
1084
1085 Returns a list of dict with keys:
1086 - results: A Results instance.
1087 - trace: The corresponding tracename parameter provided to
1088 get_tracer().trace().
1089 - output: Output gathered during execution, if get_tracer().trace(...,
1090 output=False) was used.
1091 """
1092 raise NotImplementedError(cls.__class__.__name__)
1093
1094
1095class Strace(ApiBase):
1096 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001097 @staticmethod
1098 def load_filename(filename):
1099 """Parses a filename in a log."""
1100 assert isinstance(filename, str)
1101 out = ''
1102 i = 0
1103 while i < len(filename):
1104 c = filename[i]
1105 if c == '\\':
1106 out += chr(int(filename[i+1:i+4], 8))
1107 i += 4
1108 else:
1109 out += c
1110 i += 1
1111 # TODO(maruel): That's not necessarily true that the current code page is
1112 # utf-8.
1113 return out.decode('utf-8')
1114
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001115 class Context(ApiBase.Context):
1116 """Processes a strace log line and keeps the list of existent and non
1117 existent files accessed.
1118
1119 Ignores directories.
1120
1121 Uses late-binding to processes the cwd of each process. The problem is that
1122 strace generates one log file per process it traced but doesn't give any
1123 information about which process was started when and by who. So we don't
1124 even know which process is the initial one. So process the logs out of
1125 order and use late binding with RelativePath to be able to deduce the
1126 initial directory of each process once all the logs are parsed.
1127 """
1128 class Process(ApiBase.Context.Process):
1129 """Represents the state of a process.
1130
1131 Contains all the information retrieved from the pid-specific log.
1132 """
1133 # Function names are using ([a-z_0-9]+)
1134 # This is the most common format. function(args) = result
1135 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.+?)\)\s+= (.+)$')
1136 # An interrupted function call, only grab the minimal header.
1137 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
1138 # A resumed function call.
1139 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
1140 # A process received a signal.
1141 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
1142 # A process didn't handle a signal. Ignore any junk appearing before,
1143 # because the process was forcibly killed so it won't open any new file.
1144 RE_KILLED = re.compile(
1145 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
1146 # The process has exited.
1147 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
1148 # A call was canceled. Ignore any prefix.
1149 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
1150 # Happens when strace fails to even get the function name.
1151 UNNAMED_FUNCTION = '????'
1152
1153 # Corner-case in python, a class member function decorator must not be
1154 # @staticmethod.
1155 def parse_args(regexp, expect_zero): # pylint: disable=E0213
1156 """Automatically convert the str 'args' into a list of processed
1157 arguments.
1158
1159 Arguments:
1160 - regexp is used to parse args.
1161 - expect_zero: one of True, False or None.
1162 - True: will check for result.startswith('0') first and will ignore
1163 the trace line completely otherwise. This is important because for
1164 many functions, the regexp will not process if the call failed.
1165 - False: will check for not result.startswith(('?', '-1')) for the
1166 same reason than with True.
1167 - None: ignore result.
1168 """
1169 def meta_hook(function):
1170 assert function.__name__.startswith('handle_')
1171 def hook(self, args, result):
1172 if expect_zero is True and not result.startswith('0'):
1173 return
1174 if expect_zero is False and result.startswith(('?', '-1')):
1175 return
1176 match = re.match(regexp, args)
1177 if not match:
1178 raise TracingFailure(
1179 'Failed to parse %s(%s) = %s' %
1180 (function.__name__[len('handle_'):], args, result),
1181 None, None, None)
1182 return function(self, match.groups(), result)
1183 return hook
1184 return meta_hook
1185
1186 class RelativePath(object):
1187 """A late-bound relative path."""
1188 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001189 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001190 self.parent = parent
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001191 assert isinstance(value, (None.__class__, str)), repr(value)
1192 self.value = Strace.load_filename(value) if value else value
1193 if self.value:
1194 assert '\\' not in self.value, value
1195 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001196
1197 def render(self):
1198 """Returns the current directory this instance is representing.
1199
1200 This function is used to return the late-bound value.
1201 """
1202 if self.value and self.value.startswith(u'/'):
1203 # An absolute path.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001204 # TODO(maruel): This is wrong, we can't assert it is utf-8.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001205 return self.value
1206 parent = self.parent.render() if self.parent else u'<None>'
1207 if self.value:
1208 return os.path.normpath(os.path.join(parent, self.value))
1209 return parent
1210
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001211 def __init__(self, root, pid):
1212 """Keeps enough information to be able to guess the original process
1213 root.
1214
1215 strace doesn't store which process was the initial process. So more
1216 information needs to be kept so the graph can be reconstructed from the
1217 flat map.
1218 """
1219 logging.info('%s(%d)' % (self.__class__.__name__, pid))
1220 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
1221 assert isinstance(root, ApiBase.Context)
1222 self._root = weakref.ref(root)
1223 # The dict key is the function name of the pending call, like 'open'
1224 # or 'execve'.
1225 self._pending_calls = {}
1226 self._line_number = 0
1227 # Current directory when the process started.
1228 self.initial_cwd = self.RelativePath(self._root(), None)
1229 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001230 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001231
1232 def get_cwd(self):
1233 """Returns the best known value of cwd."""
1234 return self.cwd or self.initial_cwd
1235
1236 def render(self):
1237 """Returns the string value of the RelativePath() object.
1238
1239 Used by RelativePath. Returns the initial directory and not the
1240 current one since the current directory 'cwd' validity is time-limited.
1241
1242 The validity is only guaranteed once all the logs are processed.
1243 """
1244 return self.initial_cwd.render()
1245
1246 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001247 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001248 self._line_number += 1
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001249 if self._done:
1250 raise TracingFailure(
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001251 'Found a trace for a terminated process or corrupted log',
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001252 None, None, None)
1253
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001254 if self.RE_SIGNAL.match(line):
1255 # Ignore signals.
1256 return
1257
1258 try:
1259 match = self.RE_KILLED.match(line)
1260 if match:
1261 # Converts a '+++ killed by Foo +++' trace into an exit_group().
1262 self.handle_exit_group(match.group(1), None)
1263 return
1264
1265 match = self.RE_PROCESS_EXITED.match(line)
1266 if match:
1267 # Converts a '+++ exited with 1 +++' trace into an exit_group()
1268 self.handle_exit_group(match.group(1), None)
1269 return
1270
1271 match = self.RE_UNFINISHED.match(line)
1272 if match:
1273 if match.group(1) in self._pending_calls:
1274 raise TracingFailure(
1275 'Found two unfinished calls for the same function',
1276 None, None, None,
1277 self._pending_calls)
1278 self._pending_calls[match.group(1)] = (
1279 match.group(1) + match.group(2))
1280 return
1281
1282 match = self.RE_UNAVAILABLE.match(line)
1283 if match:
1284 # This usually means a process was killed and a pending call was
1285 # canceled.
1286 # TODO(maruel): Look up the last exit_group() trace just above and
1287 # make sure any self._pending_calls[anything] is properly flushed.
1288 return
1289
1290 match = self.RE_RESUMED.match(line)
1291 if match:
1292 if match.group(1) not in self._pending_calls:
1293 raise TracingFailure(
1294 'Found a resumed call that was not logged as unfinished',
1295 None, None, None,
1296 self._pending_calls)
1297 pending = self._pending_calls.pop(match.group(1))
1298 # Reconstruct the line.
1299 line = pending + match.group(2)
1300
1301 match = self.RE_HEADER.match(line)
1302 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001303 # The line is corrupted. It happens occasionally when a process is
1304 # killed forcibly with activity going on. Assume the process died.
1305 # No other line can be processed afterward.
1306 self._done = True
1307 return
1308
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001309 if match.group(1) == self.UNNAMED_FUNCTION:
1310 return
1311
1312 # It's a valid line, handle it.
1313 handler = getattr(self, 'handle_%s' % match.group(1), None)
1314 if not handler:
1315 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1316 return handler(match.group(2), match.group(3))
1317 except TracingFailure, e:
1318 # Hack in the values since the handler could be a static function.
1319 e.pid = self.pid
1320 e.line = line
1321 e.line_number = self._line_number
1322 # Re-raise the modified exception.
1323 raise
1324 except (KeyError, NotImplementedError, ValueError), e:
1325 raise TracingFailure(
1326 'Trace generated a %s exception: %s' % (
1327 e.__class__.__name__, str(e)),
1328 self.pid,
1329 self._line_number,
1330 line,
1331 e)
1332
1333 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1334 def handle_access(self, args, _result):
1335 self._handle_file(args[0], True)
1336
1337 @parse_args(r'^\"(.+?)\"$', True)
1338 def handle_chdir(self, args, _result):
1339 """Updates cwd."""
1340 self.cwd = self.RelativePath(self, args[0])
1341 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1342
1343 def handle_clone(self, _args, result):
1344 """Transfers cwd."""
1345 if result.startswith(('?', '-1')):
1346 # The call failed.
1347 return
1348 # Update the other process right away.
1349 childpid = int(result)
1350 child = self._root().get_or_set_proc(childpid)
1351 if child.parentid is not None or childpid in self.children:
1352 raise TracingFailure(
1353 'Found internal inconsitency in process lifetime detection '
1354 'during a clone() call',
1355 None, None, None)
1356
1357 # Copy the cwd object.
1358 child.initial_cwd = self.get_cwd()
1359 child.parentid = self.pid
1360 # It is necessary because the logs are processed out of order.
1361 self.children.append(child)
1362
1363 def handle_close(self, _args, _result):
1364 pass
1365
1366 def handle_chmod(self, _args, _result):
1367 pass
1368
1369 def handle_creat(self, _args, _result):
1370 # Ignore files created, since they didn't need to exist.
1371 pass
1372
1373 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1374 def handle_execve(self, args, _result):
1375 # Even if in practice execve() doesn't returns when it succeeds, strace
1376 # still prints '0' as the result.
1377 filepath = args[0]
1378 self._handle_file(filepath, False)
1379 self.executable = self.RelativePath(self.get_cwd(), filepath)
1380 self.command = strace_process_quoted_arguments(args[1])
1381
1382 def handle_exit_group(self, _args, _result):
1383 """Removes cwd."""
1384 self.cwd = None
1385
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001386 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1387 def handle_faccessat(self, args, _results):
1388 if args[0] == 'AT_FDCWD':
1389 self._handle_file(args[1], True)
1390 else:
1391 raise Exception('Relative faccess not implemented.')
1392
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001393 def handle_fork(self, args, result):
1394 self._handle_unknown('fork', args, result)
1395
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001396 def handle_futex(self, _args, _result):
1397 pass
1398
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001399 def handle_getcwd(self, _args, _result):
1400 pass
1401
1402 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1403 def handle_link(self, args, _result):
1404 self._handle_file(args[0], False)
1405 self._handle_file(args[1], False)
1406
1407 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1408 def handle_lstat(self, args, _result):
1409 self._handle_file(args[0], True)
1410
1411 def handle_mkdir(self, _args, _result):
1412 pass
1413
1414 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1415 def handle_open(self, args, _result):
1416 if 'O_DIRECTORY' in args[1]:
1417 return
1418 self._handle_file(args[0], False)
1419
1420 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1421 def handle_openat(self, args, _result):
1422 if 'O_DIRECTORY' in args[2]:
1423 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001424 if args[0] == 'AT_FDCWD':
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001425 self._handle_file(args[1], False)
1426 else:
1427 # TODO(maruel): Implement relative open if necessary instead of the
1428 # AT_FDCWD flag, let's hope not since this means tracking all active
1429 # directory handles.
1430 raise Exception('Relative open via openat not implemented.')
1431
1432 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1433 def handle_readlink(self, args, _result):
1434 self._handle_file(args[0], False)
1435
1436 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1437 def handle_rename(self, args, _result):
1438 self._handle_file(args[0], False)
1439 self._handle_file(args[1], False)
1440
1441 def handle_rmdir(self, _args, _result):
1442 pass
1443
1444 def handle_setxattr(self, _args, _result):
1445 pass
1446
1447 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1448 def handle_stat(self, args, _result):
1449 self._handle_file(args[0], True)
1450
1451 def handle_symlink(self, _args, _result):
1452 pass
1453
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001454 @parse_args(r'^\"(.+?)\", \d+', True)
1455 def handle_truncate(self, args, _result):
1456 self._handle_file(args[0], False)
1457
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001458 def handle_unlink(self, _args, _result):
1459 # In theory, the file had to be created anyway.
1460 pass
1461
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001462 def handle_unlinkat(self, _args, _result):
1463 # In theory, the file had to be created anyway.
1464 pass
1465
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001466 def handle_statfs(self, _args, _result):
1467 pass
1468
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001469 def handle_utimensat(self, _args, _result):
1470 pass
1471
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001472 def handle_vfork(self, args, result):
1473 self._handle_unknown('vfork', args, result)
1474
1475 @staticmethod
1476 def _handle_unknown(function, args, result):
1477 raise TracingFailure(
1478 'Unexpected/unimplemented trace %s(%s)= %s' %
1479 (function, args, result),
1480 None, None, None)
1481
1482 def _handle_file(self, filepath, touch_only):
1483 filepath = self.RelativePath(self.get_cwd(), filepath)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001484 #assert not touch_only, render(filepath)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001485 self.add_file(filepath, touch_only)
1486
1487 def __init__(self, blacklist, initial_cwd):
1488 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001489 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001490 self.initial_cwd = initial_cwd
1491
1492 def render(self):
1493 """Returns the string value of the initial cwd of the root process.
1494
1495 Used by RelativePath.
1496 """
1497 return self.initial_cwd
1498
1499 def on_line(self, pid, line):
1500 """Transfers control into the Process.on_line() function."""
1501 self.get_or_set_proc(pid).on_line(line.strip())
1502
1503 def to_results(self):
1504 """Finds back the root process and verify consistency."""
1505 # TODO(maruel): Absolutely unecessary, fix me.
1506 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1507 if len(root) != 1:
1508 raise TracingFailure(
1509 'Found internal inconsitency in process lifetime detection '
1510 'while finding the root process',
1511 None,
1512 None,
1513 None,
1514 sorted(p.pid for p in root))
1515 self.root_process = root[0]
1516 process = self.root_process.to_results_process()
1517 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1518 raise TracingFailure(
1519 'Found internal inconsitency in process lifetime detection '
1520 'while looking for len(tree) == len(list)',
1521 None,
1522 None,
1523 None,
1524 sorted(self._process_lookup),
1525 sorted(p.pid for p in process.all))
1526 return Results(process)
1527
1528 def get_or_set_proc(self, pid):
1529 """Returns the Context.Process instance for this pid or creates a new one.
1530 """
1531 if not pid or not isinstance(pid, int):
1532 raise TracingFailure(
1533 'Unpexpected value for pid: %r' % pid,
1534 pid,
1535 None,
1536 None,
1537 pid)
1538 if pid not in self._process_lookup:
1539 self._process_lookup[pid] = self.Process(self, pid)
1540 return self._process_lookup[pid]
1541
1542 @classmethod
1543 def traces(cls):
1544 """Returns the list of all handled traces to pass this as an argument to
1545 strace.
1546 """
1547 prefix = 'handle_'
1548 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1549
1550 class Tracer(ApiBase.Tracer):
1551 MAX_LEN = 256
1552
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001553 def __init__(self, logname, use_sudo):
1554 super(Strace.Tracer, self).__init__(logname)
1555 self.use_sudo = use_sudo
1556
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001557 def trace(self, cmd, cwd, tracename, output):
1558 """Runs strace on an executable."""
1559 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1560 assert os.path.isabs(cmd[0]), cmd[0]
1561 assert os.path.isabs(cwd), cwd
1562 assert os.path.normpath(cwd) == cwd, cwd
1563 with self._lock:
1564 if not self._initialized:
1565 raise TracingFailure(
1566 'Called Tracer.trace() on an unitialized object',
1567 None, None, None, tracename)
1568 assert tracename not in (i['trace'] for i in self._traces)
1569 stdout = stderr = None
1570 if output:
1571 stdout = subprocess.PIPE
1572 stderr = subprocess.STDOUT
1573 # Ensure all file related APIs are hooked.
1574 traces = ','.join(Strace.Context.traces() + ['file'])
1575 trace_cmd = [
1576 'strace',
1577 '-ff',
1578 '-s', '%d' % self.MAX_LEN,
1579 '-e', 'trace=%s' % traces,
1580 '-o', self._logname + '.' + tracename,
1581 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001582 if self.use_sudo is True:
1583 trace_cmd.insert(0, 'sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001584 child = subprocess.Popen(
1585 trace_cmd + cmd,
1586 cwd=cwd,
1587 stdin=subprocess.PIPE,
1588 stdout=stdout,
1589 stderr=stderr)
1590 out = child.communicate()[0]
1591 # TODO(maruel): Walk the logs and figure out the root process would
1592 # simplify parsing the logs a *lot*.
1593 with self._lock:
1594 assert tracename not in (i['trace'] for i in self._traces)
1595 self._traces.append(
1596 {
1597 'cmd': cmd,
1598 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001599 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001600 # The pid of strace process, not very useful.
1601 'pid': child.pid,
1602 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001603 })
1604 return child.returncode, out
1605
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001606 def __init__(self, use_sudo=None):
1607 super(Strace, self).__init__()
1608 self.use_sudo = use_sudo
1609
1610 def get_tracer(self, logname):
1611 return self.Tracer(logname, self.use_sudo)
1612
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001613 @staticmethod
1614 def clean_trace(logname):
1615 if os.path.isfile(logname):
1616 os.remove(logname)
1617 # Also delete any pid specific file from previous traces.
1618 for i in glob.iglob(logname + '.*'):
1619 if i.rsplit('.', 1)[1].isdigit():
1620 os.remove(i)
1621
1622 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001623 def parse_log(cls, logname, blacklist, trace_name):
1624 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001625 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001626 data = read_json(logname)
1627 out = []
1628 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001629 if trace_name and item['trace'] != trace_name:
1630 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001631 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001632 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001633 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001634 }
1635 try:
1636 context = cls.Context(blacklist, item['cwd'])
1637 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
1638 pid = pidfile.rsplit('.', 1)[1]
1639 if pid.isdigit():
1640 pid = int(pid)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001641 for line in open(pidfile, 'rb'):
1642 context.on_line(pid, line)
1643 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001644 except TracingFailure:
1645 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001646 out.append(result)
1647 return out
1648
1649
1650class Dtrace(ApiBase):
1651 """Uses DTrace framework through dtrace. Requires root access.
1652
1653 Implies Mac OSX.
1654
1655 dtruss can't be used because it has compatibility issues with python.
1656
1657 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1658 get the absolute path of the 'cwd' dtrace variable from the probe.
1659
1660 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1661 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1662 manually.
1663
1664 errno is not printed in the log since this implementation currently only cares
1665 about files that were successfully opened.
1666 """
1667 class Context(ApiBase.Context):
1668 # Format: index pid function(args)
1669 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1670
1671 # Arguments parsing.
1672 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1673 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1674 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1675 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1676 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1677 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1678
1679 O_DIRECTORY = 0x100000
1680
1681 class Process(ApiBase.Context.Process):
1682 def __init__(self, *args):
1683 super(Dtrace.Context.Process, self).__init__(*args)
1684 self.cwd = self.initial_cwd
1685
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001686 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001687 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001688 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001689 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001690 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001691 # Process ID of the temporary script created by create_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001692 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001693 self._initial_cwd = initial_cwd
1694 self._line_number = 0
1695
1696 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001697 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001698 self._line_number += 1
1699 match = self.RE_HEADER.match(line)
1700 if not match:
1701 raise TracingFailure(
1702 'Found malformed line: %s' % line,
1703 None,
1704 self._line_number,
1705 line)
1706 fn = getattr(
1707 self,
1708 'handle_%s' % match.group(2).replace('-', '_'),
1709 self._handle_ignored)
1710 # It is guaranteed to succeed because of the regexp. Or at least I thought
1711 # it would.
1712 pid = int(match.group(1))
1713 try:
1714 return fn(pid, match.group(3))
1715 except TracingFailure, e:
1716 # Hack in the values since the handler could be a static function.
1717 e.pid = pid
1718 e.line = line
1719 e.line_number = self._line_number
1720 # Re-raise the modified exception.
1721 raise
1722 except (KeyError, NotImplementedError, ValueError), e:
1723 raise TracingFailure(
1724 'Trace generated a %s exception: %s' % (
1725 e.__class__.__name__, str(e)),
1726 pid,
1727 self._line_number,
1728 line,
1729 e)
1730
1731 def to_results(self):
1732 process = self.root_process.to_results_process()
1733 # Internal concistency check.
1734 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1735 raise TracingFailure(
1736 'Found internal inconsitency in process lifetime detection '
1737 'while looking for len(tree) == len(list)',
1738 None,
1739 None,
1740 None,
1741 sorted(self._process_lookup),
1742 sorted(p.pid for p in process.all))
1743 return Results(process)
1744
1745 def handle_dtrace_BEGIN(self, _pid, args):
1746 if not self.RE_DTRACE_BEGIN.match(args):
1747 raise TracingFailure(
1748 'Found internal inconsitency in dtrace_BEGIN log line',
1749 None, None, None)
1750
1751 def handle_proc_start(self, pid, args):
1752 """Transfers cwd.
1753
1754 The dtrace script already takes care of only tracing the processes that
1755 are child of the traced processes so there is no need to verify the
1756 process hierarchy.
1757 """
1758 if pid in self._process_lookup:
1759 raise TracingFailure(
1760 'Found internal inconsitency in proc_start: %d started two times' %
1761 pid,
1762 None, None, None)
1763 match = self.RE_PROC_START.match(args)
1764 if not match:
1765 raise TracingFailure(
1766 'Failed to parse arguments: %s' % args,
1767 None, None, None)
1768 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001769 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001770 proc = self.root_process = self.Process(
1771 self.blacklist, pid, self._initial_cwd)
1772 elif ppid in self._process_lookup:
1773 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1774 self._process_lookup[ppid].children.append(proc)
1775 else:
1776 # Another process tree, ignore.
1777 return
1778 self._process_lookup[pid] = proc
1779 logging.debug(
1780 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001781 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001782
1783 def handle_proc_exit(self, pid, _args):
1784 """Removes cwd."""
1785 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001786 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001787 self._process_lookup[pid].cwd = None
1788
1789 def handle_execve(self, pid, args):
1790 """Sets the process' executable.
1791
1792 TODO(maruel): Read command line arguments. See
1793 https://discussions.apple.com/thread/1980539 for an example.
1794 https://gist.github.com/1242279
1795
1796 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1797 :)
1798 """
1799 if not pid in self._process_lookup:
1800 # Another process tree, ignore.
1801 return
1802 match = self.RE_EXECVE.match(args)
1803 if not match:
1804 raise TracingFailure(
1805 'Failed to parse arguments: %r' % args,
1806 None, None, None)
1807 proc = self._process_lookup[pid]
1808 proc.executable = match.group(1)
1809 proc.command = self.process_escaped_arguments(match.group(3))
1810 if int(match.group(2)) != len(proc.command):
1811 raise TracingFailure(
1812 'Failed to parse execve() arguments: %s' % args,
1813 None, None, None)
1814
1815 def handle_chdir(self, pid, args):
1816 """Updates cwd."""
1817 if pid not in self._process_lookup:
1818 # Another process tree, ignore.
1819 return
1820 cwd = self.RE_CHDIR.match(args).group(1)
1821 if not cwd.startswith('/'):
1822 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1823 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1824 else:
1825 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1826 cwd2 = cwd
1827 self._process_lookup[pid].cwd = cwd2
1828
1829 def handle_open_nocancel(self, pid, args):
1830 """Redirects to handle_open()."""
1831 return self.handle_open(pid, args)
1832
1833 def handle_open(self, pid, args):
1834 if pid not in self._process_lookup:
1835 # Another process tree, ignore.
1836 return
1837 match = self.RE_OPEN.match(args)
1838 if not match:
1839 raise TracingFailure(
1840 'Failed to parse arguments: %s' % args,
1841 None, None, None)
1842 flag = int(match.group(2), 16)
1843 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1844 # Ignore directories.
1845 return
1846 self._handle_file(pid, match.group(1))
1847
1848 def handle_rename(self, pid, args):
1849 if pid not in self._process_lookup:
1850 # Another process tree, ignore.
1851 return
1852 match = self.RE_RENAME.match(args)
1853 if not match:
1854 raise TracingFailure(
1855 'Failed to parse arguments: %s' % args,
1856 None, None, None)
1857 self._handle_file(pid, match.group(1))
1858 self._handle_file(pid, match.group(2))
1859
1860 def _handle_file(self, pid, filepath):
1861 if not filepath.startswith('/'):
1862 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1863 # We can get '..' in the path.
1864 filepath = os.path.normpath(filepath)
1865 # Sadly, still need to filter out directories here;
1866 # saw open_nocancel(".", 0, 0) = 0 lines.
1867 if os.path.isdir(filepath):
1868 return
1869 self._process_lookup[pid].add_file(filepath, False)
1870
1871 def handle_ftruncate(self, pid, args):
1872 """Just used as a signal to kill dtrace, ignoring."""
1873 pass
1874
1875 @staticmethod
1876 def _handle_ignored(pid, args):
1877 """Is called for all the event traces that are not handled."""
1878 raise NotImplementedError('Please implement me')
1879
1880 @staticmethod
1881 def process_escaped_arguments(text):
1882 """Extracts escaped arguments on a string and return the arguments as a
1883 list.
1884
1885 Implemented as an automaton.
1886
1887 Example:
1888 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1889 function will return ['python2.7', '-c', 'print("hi")]
1890 """
1891 if not text.endswith('\\0'):
1892 raise ValueError('String is not null terminated: %r' % text, text)
1893 text = text[:-2]
1894
1895 def unescape(x):
1896 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1897 out = []
1898 escaped = False
1899 for i in x:
1900 if i == '\\' and not escaped:
1901 escaped = True
1902 continue
1903 escaped = False
1904 out.append(i)
1905 return ''.join(out)
1906
1907 return [unescape(i) for i in text.split('\\001')]
1908
1909 class Tracer(ApiBase.Tracer):
1910 # pylint: disable=C0301
1911 #
1912 # To understand the following code, you'll want to take a look at:
1913 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
1914 # https://wikis.oracle.com/display/DTrace/Variables
1915 # http://docs.oracle.com/cd/E19205-01/820-4221/
1916 #
1917 # 0. Dump all the valid probes into a text file. It is important, you
1918 # want to redirect into a file and you don't want to constantly 'sudo'.
1919 # $ sudo dtrace -l > probes.txt
1920 #
1921 # 1. Count the number of probes:
1922 # $ wc -l probes.txt
1923 # 81823 # On OSX 10.7, including 1 header line.
1924 #
1925 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1926 # likes and skipping the header with NR>1:
1927 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1928 # dtrace
1929 # fbt
1930 # io
1931 # ip
1932 # lockstat
1933 # mach_trap
1934 # proc
1935 # profile
1936 # sched
1937 # syscall
1938 # tcp
1939 # vminfo
1940 #
1941 # 3. List of valid probes:
1942 # $ grep syscall probes.txt | less
1943 # or use dtrace directly:
1944 # $ sudo dtrace -l -P syscall | less
1945 #
1946 # trackedpid is an associative array where its value can be 0, 1 or 2.
1947 # 0 is for untracked processes and is the default value for items not
1948 # in the associative array.
1949 # 1 is for tracked processes.
1950 # 2 is for the script created by create_thunk() only. It is not tracked
1951 # itself but all its decendants are.
1952 #
1953 # The script will kill itself only once waiting_to_die == 1 and
1954 # current_processes == 0, so that both getlogin() was called and that
1955 # all traced processes exited.
1956 #
1957 # TODO(maruel): Use cacheable predicates. See
1958 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1959 D_CODE = """
1960 dtrace:::BEGIN {
1961 waiting_to_die = 0;
1962 current_processes = 0;
1963 logindex = 0;
1964 printf("%d %d %s_%s(\\"%s\\")\\n",
1965 logindex, PID, probeprov, probename, SCRIPT);
1966 logindex++;
1967 }
1968
1969 proc:::start /trackedpid[ppid]/ {
1970 trackedpid[pid] = 1;
1971 current_processes += 1;
1972 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1973 logindex, pid, probeprov, probename,
1974 ppid,
1975 execname,
1976 current_processes);
1977 logindex++;
1978 }
1979 /* Should use SCRIPT but there is no access to this variable at that
1980 * point. */
1981 proc:::start /ppid == PID && execname == "Python"/ {
1982 trackedpid[pid] = 2;
1983 current_processes += 1;
1984 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1985 logindex, pid, probeprov, probename,
1986 ppid,
1987 execname,
1988 current_processes);
1989 logindex++;
1990 }
1991 proc:::exit /trackedpid[pid] &&
1992 current_processes == 1 &&
1993 waiting_to_die == 1/ {
1994 trackedpid[pid] = 0;
1995 current_processes -= 1;
1996 printf("%d %d %s_%s(%d)\\n",
1997 logindex, pid, probeprov, probename,
1998 current_processes);
1999 logindex++;
2000 exit(0);
2001 }
2002 proc:::exit /trackedpid[pid]/ {
2003 trackedpid[pid] = 0;
2004 current_processes -= 1;
2005 printf("%d %d %s_%s(%d)\\n",
2006 logindex, pid, probeprov, probename,
2007 current_processes);
2008 logindex++;
2009 }
2010
2011 /* Use an arcane function to detect when we need to die */
2012 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
2013 waiting_to_die = 1;
2014 printf("%d %d %s()\\n", logindex, pid, probefunc);
2015 logindex++;
2016 }
2017 syscall::ftruncate:entry /
2018 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
2019 exit(0);
2020 }
2021
2022 syscall::open*:entry /trackedpid[pid] == 1/ {
2023 self->open_arg0 = arg0;
2024 self->open_arg1 = arg1;
2025 self->open_arg2 = arg2;
2026 }
2027 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
2028 this->open_arg0 = copyinstr(self->open_arg0);
2029 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
2030 logindex, pid, probefunc,
2031 this->open_arg0,
2032 self->open_arg1,
2033 self->open_arg2);
2034 logindex++;
2035 this->open_arg0 = 0;
2036 }
2037 syscall::open*:return /trackedpid[pid] == 1/ {
2038 self->open_arg0 = 0;
2039 self->open_arg1 = 0;
2040 self->open_arg2 = 0;
2041 }
2042
2043 syscall::rename:entry /trackedpid[pid] == 1/ {
2044 self->rename_arg0 = arg0;
2045 self->rename_arg1 = arg1;
2046 }
2047 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
2048 this->rename_arg0 = copyinstr(self->rename_arg0);
2049 this->rename_arg1 = copyinstr(self->rename_arg1);
2050 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
2051 logindex, pid, probefunc,
2052 this->rename_arg0,
2053 this->rename_arg1);
2054 logindex++;
2055 this->rename_arg0 = 0;
2056 this->rename_arg1 = 0;
2057 }
2058 syscall::rename:return /trackedpid[pid] == 1/ {
2059 self->rename_arg0 = 0;
2060 self->rename_arg1 = 0;
2061 }
2062
2063 /* Track chdir, it's painful because it is only receiving relative path.
2064 */
2065 syscall::chdir:entry /trackedpid[pid] == 1/ {
2066 self->chdir_arg0 = arg0;
2067 }
2068 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
2069 this->chdir_arg0 = copyinstr(self->chdir_arg0);
2070 printf("%d %d %s(\\"%s\\")\\n",
2071 logindex, pid, probefunc,
2072 this->chdir_arg0);
2073 logindex++;
2074 this->chdir_arg0 = 0;
2075 }
2076 syscall::chdir:return /trackedpid[pid] == 1/ {
2077 self->chdir_arg0 = 0;
2078 }
2079 """
2080
2081 # execve-specific code, tends to throw a lot of exceptions.
2082 D_CODE_EXECVE = """
2083 /* Finally what we care about! */
2084 syscall::exec*:entry /trackedpid[pid]/ {
2085 self->exec_arg0 = copyinstr(arg0);
2086 /* Incrementally probe for a NULL in the argv parameter of execve() to
2087 * figure out argc. */
2088 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
2089 * found. */
2090 self->exec_argc = 0;
2091 /* Probe for argc==1 */
2092 this->exec_argv = (user_addr_t*)copyin(
2093 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2094 self->exec_argc = this->exec_argv[self->exec_argc] ?
2095 (self->exec_argc + 1) : self->exec_argc;
2096
2097 /* Probe for argc==2 */
2098 this->exec_argv = (user_addr_t*)copyin(
2099 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2100 self->exec_argc = this->exec_argv[self->exec_argc] ?
2101 (self->exec_argc + 1) : self->exec_argc;
2102
2103 /* Probe for argc==3 */
2104 this->exec_argv = (user_addr_t*)copyin(
2105 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2106 self->exec_argc = this->exec_argv[self->exec_argc] ?
2107 (self->exec_argc + 1) : self->exec_argc;
2108
2109 /* Probe for argc==4 */
2110 this->exec_argv = (user_addr_t*)copyin(
2111 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2112 self->exec_argc = this->exec_argv[self->exec_argc] ?
2113 (self->exec_argc + 1) : self->exec_argc;
2114
2115 /* Copy the inputs strings since there is no guarantee they'll be
2116 * present after the call completed. */
2117 self->exec_argv0 = (self->exec_argc > 0) ?
2118 copyinstr(this->exec_argv[0]) : "";
2119 self->exec_argv1 = (self->exec_argc > 1) ?
2120 copyinstr(this->exec_argv[1]) : "";
2121 self->exec_argv2 = (self->exec_argc > 2) ?
2122 copyinstr(this->exec_argv[2]) : "";
2123 self->exec_argv3 = (self->exec_argc > 3) ?
2124 copyinstr(this->exec_argv[3]) : "";
2125 this->exec_argv = 0;
2126 }
2127 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2128 /* We need to join strings here, as using multiple printf() would
2129 * cause tearing when multiple threads/processes are traced.
2130 * Since it is impossible to escape a string and join it to another one,
2131 * like sprintf("%s%S", previous, more), use hackery.
2132 * Each of the elements are split with a \\1. \\0 cannot be used because
2133 * it is simply ignored. This will conflict with any program putting a
2134 * \\1 in their execve() string but this should be "rare enough" */
2135 this->args = "";
2136 /* Process exec_argv[0] */
2137 this->args = strjoin(
2138 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2139
2140 /* Process exec_argv[1] */
2141 this->args = strjoin(
2142 this->args, (self->exec_argc > 1) ? "\\1" : "");
2143 this->args = strjoin(
2144 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2145
2146 /* Process exec_argv[2] */
2147 this->args = strjoin(
2148 this->args, (self->exec_argc > 2) ? "\\1" : "");
2149 this->args = strjoin(
2150 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2151
2152 /* Process exec_argv[3] */
2153 this->args = strjoin(
2154 this->args, (self->exec_argc > 3) ? "\\1" : "");
2155 this->args = strjoin(
2156 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2157
2158 /* Prints self->exec_argc to permits verifying the internal
2159 * consistency since this code is quite fishy. */
2160 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2161 logindex, pid, probefunc,
2162 self->exec_arg0,
2163 self->exec_argc,
2164 this->args);
2165 logindex++;
2166 this->args = 0;
2167 }
2168 syscall::exec*:return /trackedpid[pid]/ {
2169 self->exec_arg0 = 0;
2170 self->exec_argc = 0;
2171 self->exec_argv0 = 0;
2172 self->exec_argv1 = 0;
2173 self->exec_argv2 = 0;
2174 self->exec_argv3 = 0;
2175 }
2176 """
2177
2178 # Code currently not used.
2179 D_EXTRANEOUS = """
2180 /* This is a good learning experience, since it traces a lot of things
2181 * related to the process and child processes.
2182 * Warning: it generates a gigantic log. For example, tracing
2183 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2184 * several minutes to execute.
2185 */
2186 /*
2187 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2188 printf("%d %d %s_%s() = %d\\n",
2189 logindex, pid, probeprov, probefunc, errno);
2190 logindex++;
2191 }
2192 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2193 printf("%d %d %s_%s() = %d\\n",
2194 logindex, pid, probeprov, probefunc, errno);
2195 logindex++;
2196 }
2197 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2198 printf("%d %d %s_%s() = %d\\n",
2199 logindex, pid, probeprov, probefunc, errno);
2200 logindex++;
2201 }
2202 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2203 printf("%d %d %s_%s() = %d\\n",
2204 logindex, pid, probeprov, probefunc, errno);
2205 logindex++;
2206 }
2207 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2208 printf("%d %d %s_%s() = %d\\n",
2209 logindex, pid, probeprov, probefunc, errno);
2210 logindex++;
2211 }
2212 */
2213 /* TODO(maruel): *stat* functions and friends
2214 syscall::access:return,
2215 syscall::chdir:return,
2216 syscall::chflags:return,
2217 syscall::chown:return,
2218 syscall::chroot:return,
2219 syscall::getattrlist:return,
2220 syscall::getxattr:return,
2221 syscall::lchown:return,
2222 syscall::lstat64:return,
2223 syscall::lstat:return,
2224 syscall::mkdir:return,
2225 syscall::pathconf:return,
2226 syscall::readlink:return,
2227 syscall::removexattr:return,
2228 syscall::setxattr:return,
2229 syscall::stat64:return,
2230 syscall::stat:return,
2231 syscall::truncate:return,
2232 syscall::unlink:return,
2233 syscall::utimes:return,
2234 */
2235 """
2236
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002237 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002238 """Starts the log collection with dtrace.
2239
2240 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2241 this needs to wait for dtrace to be "warmed up".
2242 """
2243 super(Dtrace.Tracer, self).__init__(logname)
2244 self._script = create_thunk()
2245 # This unique dummy temp file is used to signal the dtrace script that it
2246 # should stop as soon as all the child processes are done. A bit hackish
2247 # but works fine enough.
2248 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2249 prefix='trace_signal_file')
2250
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002251 dtrace_path = '/usr/sbin/dtrace'
2252 if not os.path.isfile(dtrace_path):
2253 dtrace_path = 'dtrace'
2254 elif use_sudo is None and (os.stat(dtrace_path).st_mode & stat.S_ISUID):
2255 # No need to sudo. For those following at home, don't do that.
2256 use_sudo = False
2257
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002258 # Note: do not use the -p flag. It's useless if the initial process quits
2259 # too fast, resulting in missing traces from the grand-children. The D
2260 # code manages the dtrace lifetime itself.
2261 trace_cmd = [
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002262 dtrace_path,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002263 # Use a larger buffer if getting 'out of scratch space' errors.
2264 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2265 '-b', '10m',
2266 '-x', 'dynvarsize=10m',
2267 #'-x', 'dtrace_global_maxsize=1m',
2268 '-x', 'evaltime=exec',
2269 '-o', '/dev/stderr',
2270 '-q',
2271 '-n', self._get_dtrace_code(),
2272 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002273 if use_sudo is not False:
2274 trace_cmd.insert(0, 'sudo')
2275
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002276 with open(self._logname + '.log', 'wb') as logfile:
2277 self._dtrace = subprocess.Popen(
2278 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2279 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2280
2281 # Reads until one line is printed, which signifies dtrace is up and ready.
2282 with open(self._logname + '.log', 'rb') as logfile:
2283 while 'dtrace_BEGIN' not in logfile.readline():
2284 if self._dtrace.poll() is not None:
2285 # Do a busy wait. :/
2286 break
2287 logging.debug('dtrace started')
2288
2289 def _get_dtrace_code(self):
2290 """Setups the D code to implement child process tracking.
2291
2292 Injects the cookie in the script so it knows when to stop.
2293
2294 The script will detect any instance of the script created with
2295 create_thunk() and will start tracing it.
2296 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002297 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002298 'inline int PID = %d;\n'
2299 'inline string SCRIPT = "%s";\n'
2300 'inline int FILE_ID = %d;\n'
2301 '\n'
2302 '%s') % (
2303 os.getpid(),
2304 self._script,
2305 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002306 self.D_CODE)
2307 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2308 # Do not enable by default since it tends to spew dtrace: error lines
2309 # because the execve() parameters are not in valid memory at the time of
2310 # logging.
2311 # TODO(maruel): Find a way to make this reliable since it's useful but
2312 # only works in limited/trivial uses cases for now.
2313 out += self.D_CODE_EXECVE
2314 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002315
2316 def trace(self, cmd, cwd, tracename, output):
2317 """Runs dtrace on an executable.
2318
2319 This dtruss is broken when it starts the process itself or when tracing
2320 child processes, this code starts a wrapper process
2321 generated with create_thunk() which starts the executable to trace.
2322 """
2323 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2324 assert os.path.isabs(cmd[0]), cmd[0]
2325 assert os.path.isabs(cwd), cwd
2326 assert os.path.normpath(cwd) == cwd, cwd
2327 with self._lock:
2328 if not self._initialized:
2329 raise TracingFailure(
2330 'Called Tracer.trace() on an unitialized object',
2331 None, None, None, tracename)
2332 assert tracename not in (i['trace'] for i in self._traces)
2333
2334 # Starts the script wrapper to start the child process. This signals the
2335 # dtrace script that this process is to be traced.
2336 stdout = stderr = None
2337 if output:
2338 stdout = subprocess.PIPE
2339 stderr = subprocess.STDOUT
2340 child_cmd = [
2341 sys.executable,
2342 self._script,
2343 tracename,
2344 ]
2345 # Call a dummy function so that dtrace knows I'm about to launch a process
2346 # that needs to be traced.
2347 # Yummy.
2348 child = subprocess.Popen(
2349 child_cmd + fix_python_path(cmd),
2350 stdin=subprocess.PIPE,
2351 stdout=stdout,
2352 stderr=stderr,
2353 cwd=cwd)
2354 logging.debug('Started child pid: %d' % child.pid)
2355
2356 out = child.communicate()[0]
2357 # This doesn't mean tracing is done, one of the grand-child process may
2358 # still be alive. It will be tracked with the dtrace script.
2359
2360 with self._lock:
2361 assert tracename not in (i['trace'] for i in self._traces)
2362 self._traces.append(
2363 {
2364 'cmd': cmd,
2365 'cwd': cwd,
2366 # The pid of strace process, not very useful.
2367 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002368 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002369 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002370 })
2371 return child.returncode, out
2372
2373 def close(self, timeout=None):
2374 """Terminates dtrace."""
2375 logging.debug('close(%s)' % timeout)
2376 try:
2377 try:
2378 super(Dtrace.Tracer, self).close(timeout)
2379 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002380 # ftruncate doesn't exist on Windows.
2381 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002382 if timeout:
2383 start = time.time()
2384 # Use polling. :/
2385 while (self._dtrace.poll() is None and
2386 (time.time() - start) < timeout):
2387 time.sleep(0.1)
2388 self._dtrace.kill()
2389 self._dtrace.wait()
2390 finally:
2391 # Make sure to kill it in any case.
2392 if self._dtrace.poll() is None:
2393 try:
2394 self._dtrace.kill()
2395 self._dtrace.wait()
2396 except OSError:
2397 pass
2398
2399 if self._dtrace.returncode != 0:
2400 # Warn about any dtrace failure but basically ignore it.
2401 print 'dtrace failure: %s' % self._dtrace.returncode
2402 finally:
2403 os.close(self._dummy_file_id)
2404 os.remove(self._dummy_file_name)
2405 os.remove(self._script)
2406
2407 def post_process_log(self):
2408 """Sorts the log back in order when each call occured.
2409
2410 dtrace doesn't save the buffer in strict order since it keeps one buffer
2411 per CPU.
2412 """
2413 super(Dtrace.Tracer, self).post_process_log()
2414 logname = self._logname + '.log'
2415 with open(logname, 'rb') as logfile:
2416 lines = [l for l in logfile if l.strip()]
2417 errors = [l for l in lines if l.startswith('dtrace:')]
2418 if errors:
2419 raise TracingFailure(
2420 'Found errors in the trace: %s' % '\n'.join(errors),
2421 None, None, None, logname)
2422 try:
2423 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2424 except ValueError:
2425 raise TracingFailure(
2426 'Found errors in the trace: %s' % '\n'.join(
2427 l for l in lines if l.split(' ', 1)[0].isdigit()),
2428 None, None, None, logname)
2429 with open(logname, 'wb') as logfile:
2430 logfile.write(''.join(lines))
2431
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002432 def __init__(self, use_sudo=None):
2433 super(Dtrace, self).__init__()
2434 self.use_sudo = use_sudo
2435
2436 def get_tracer(self, logname):
2437 return self.Tracer(logname, self.use_sudo)
2438
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002439 @staticmethod
2440 def clean_trace(logname):
2441 for ext in ('', '.log'):
2442 if os.path.isfile(logname + ext):
2443 os.remove(logname + ext)
2444
2445 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002446 def parse_log(cls, logname, blacklist, trace_name):
2447 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002448 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002449
2450 def blacklist_more(filepath):
2451 # All the HFS metadata is in the form /.vol/...
2452 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2453
2454 data = read_json(logname)
2455 out = []
2456 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002457 if trace_name and item['trace'] != trace_name:
2458 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002459 result = {
2460 'output': item['output'],
2461 'trace': item['trace'],
2462 }
2463 try:
2464 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002465 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2466 # be valid UTF-8 and we control the log output.
2467 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002468 context.on_line(line)
2469 result['results'] = context.to_results()
2470 except TracingFailure:
2471 result['exception'] = sys.exc_info()
2472 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002473 return out
2474
2475
2476class LogmanTrace(ApiBase):
2477 """Uses the native Windows ETW based tracing functionality to trace a child
2478 process.
2479
2480 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2481 the Windows Kernel doesn't have a concept of 'current working directory' at
2482 all. A Win32 process has a map of current directories, one per drive letter
2483 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2484 opened relative to another file_object or as an absolute path. All the current
2485 working directory logic is done in user mode.
2486 """
2487 class Context(ApiBase.Context):
2488 """Processes a ETW log line and keeps the list of existent and non
2489 existent files accessed.
2490
2491 Ignores directories.
2492 """
2493 # These indexes are for the stripped version in json.
2494 EVENT_NAME = 0
2495 TYPE = 1
2496 PID = 2
2497 TID = 3
2498 PROCESSOR_ID = 4
2499 TIMESTAMP = 5
2500 USER_DATA = 6
2501
2502 class Process(ApiBase.Context.Process):
2503 def __init__(self, *args):
2504 super(LogmanTrace.Context.Process, self).__init__(*args)
2505 # Handle file objects that succeeded.
2506 self.file_objects = {}
2507
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002508 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2509 logging.info(
2510 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2511 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002512 super(LogmanTrace.Context, self).__init__(blacklist)
2513 self._drive_map = DosDriveMap()
2514 # Threads mapping to the corresponding process id.
2515 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002516 # Process ID of the tracer, e.g. the temporary script created by
2517 # create_thunk(). This is tricky because the process id may have been
2518 # reused.
2519 self._thunk_pid = thunk_pid
2520 self._thunk_cmd = thunk_cmd
2521 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002522 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002523 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002524
2525 def on_line(self, line):
2526 """Processes a json Event line."""
2527 self._line_number += 1
2528 try:
2529 # By Opcode
2530 handler = getattr(
2531 self,
2532 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2533 None)
2534 if not handler:
2535 raise TracingFailure(
2536 'Unexpected event %s_%s' % (
2537 line[self.EVENT_NAME], line[self.TYPE]),
2538 None, None, None)
2539 handler(line)
2540 except TracingFailure, e:
2541 # Hack in the values since the handler could be a static function.
2542 e.pid = line[self.PID]
2543 e.line = line
2544 e.line_number = self._line_number
2545 # Re-raise the modified exception.
2546 raise
2547 except (KeyError, NotImplementedError, ValueError), e:
2548 raise TracingFailure(
2549 'Trace generated a %s exception: %s' % (
2550 e.__class__.__name__, str(e)),
2551 line[self.PID],
2552 self._line_number,
2553 line,
2554 e)
2555
2556 def to_results(self):
2557 if not self.root_process:
2558 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002559 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002560 None, None, None)
2561 process = self.root_process.to_results_process()
2562 return Results(process)
2563
2564 def _thread_to_process(self, tid):
2565 """Finds the process from the thread id."""
2566 tid = int(tid, 16)
2567 pid = self._threads_active.get(tid)
2568 if not pid or not self._process_lookup.get(pid):
2569 return
2570 return self._process_lookup[pid]
2571
2572 @classmethod
2573 def handle_EventTrace_Header(cls, line):
2574 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2575 BUFFER_SIZE = cls.USER_DATA
2576 #VERSION = cls.USER_DATA + 1
2577 #PROVIDER_VERSION = cls.USER_DATA + 2
2578 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2579 #END_TIME = cls.USER_DATA + 4
2580 #TIMER_RESOLUTION = cls.USER_DATA + 5
2581 #MAX_FILE_SIZE = cls.USER_DATA + 6
2582 #LOG_FILE_MODE = cls.USER_DATA + 7
2583 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2584 #START_BUFFERS = cls.USER_DATA + 9
2585 #POINTER_SIZE = cls.USER_DATA + 10
2586 EVENTS_LOST = cls.USER_DATA + 11
2587 #CPU_SPEED = cls.USER_DATA + 12
2588 #LOGGER_NAME = cls.USER_DATA + 13
2589 #LOG_FILE_NAME = cls.USER_DATA + 14
2590 #BOOT_TIME = cls.USER_DATA + 15
2591 #PERF_FREQ = cls.USER_DATA + 16
2592 #START_TIME = cls.USER_DATA + 17
2593 #RESERVED_FLAGS = cls.USER_DATA + 18
2594 #BUFFERS_LOST = cls.USER_DATA + 19
2595 #SESSION_NAME_STRING = cls.USER_DATA + 20
2596 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2597 if line[EVENTS_LOST] != '0':
2598 raise TracingFailure(
2599 ( '%s events were lost during trace, please increase the buffer '
2600 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2601 None, None, None)
2602
2603 def handle_FileIo_Cleanup(self, line):
2604 """General wisdom: if a file is closed, it's because it was opened.
2605
2606 Note that FileIo_Close is not used since if a file was opened properly but
2607 not closed before the process exits, only Cleanup will be logged.
2608 """
2609 #IRP = self.USER_DATA
2610 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2611 FILE_OBJECT = self.USER_DATA + 2
2612 #FILE_KEY = self.USER_DATA + 3
2613 proc = self._thread_to_process(line[TTID])
2614 if not proc:
2615 # Not a process we care about.
2616 return
2617 file_object = line[FILE_OBJECT]
2618 if file_object in proc.file_objects:
2619 proc.add_file(proc.file_objects.pop(file_object), False)
2620
2621 def handle_FileIo_Create(self, line):
2622 """Handles a file open.
2623
2624 All FileIo events are described at
2625 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2626 for some value of 'description'.
2627
2628 " (..) process and thread id values of the IO events (..) are not valid "
2629 http://msdn.microsoft.com/magazine/ee358703.aspx
2630
2631 The FileIo.Create event doesn't return if the CreateFile() call
2632 succeeded, so keep track of the file_object and check that it is
2633 eventually closed with FileIo_Cleanup.
2634 """
2635 #IRP = self.USER_DATA
2636 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2637 FILE_OBJECT = self.USER_DATA + 2
2638 #CREATE_OPTIONS = self.USER_DATA + 3
2639 #FILE_ATTRIBUTES = self.USER_DATA + 4
2640 #self.USER_DATA + SHARE_ACCESS = 5
2641 OPEN_PATH = self.USER_DATA + 6
2642
2643 proc = self._thread_to_process(line[TTID])
2644 if not proc:
2645 # Not a process we care about.
2646 return
2647
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002648 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002649 # Ignore directories and bare drive right away.
2650 if raw_path.endswith(os.path.sep):
2651 return
2652 filepath = self._drive_map.to_win32(raw_path)
2653 # Ignore bare drive right away. Some may still fall through with format
2654 # like '\\?\X:'
2655 if len(filepath) == 2:
2656 return
2657 file_object = line[FILE_OBJECT]
2658 if os.path.isdir(filepath):
2659 # There is no O_DIRECTORY equivalent on Windows. The closed is
2660 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2661 # simply discard directories are they are found.
2662 return
2663 # Override any stale file object
2664 proc.file_objects[file_object] = filepath
2665
2666 def handle_FileIo_Rename(self, line):
2667 # TODO(maruel): Handle?
2668 pass
2669
2670 def handle_Process_End(self, line):
2671 pid = line[self.PID]
2672 if self._process_lookup.get(pid):
2673 logging.info('Terminated: %d' % pid)
2674 self._process_lookup[pid] = None
2675 else:
2676 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002677 if self._thunk_process and self._thunk_process.pid == pid:
2678 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002679
2680 def handle_Process_Start(self, line):
2681 """Handles a new child process started by PID."""
2682 #UNIQUE_PROCESS_KEY = self.USER_DATA
2683 PROCESS_ID = self.USER_DATA + 1
2684 #PARENT_PID = self.USER_DATA + 2
2685 #SESSION_ID = self.USER_DATA + 3
2686 #EXIT_STATUS = self.USER_DATA + 4
2687 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2688 #USER_SID = self.USER_DATA + 6
2689 IMAGE_FILE_NAME = self.USER_DATA + 7
2690 COMMAND_LINE = self.USER_DATA + 8
2691
2692 ppid = line[self.PID]
2693 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002694 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002695 logging.debug(
2696 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002697 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002698
2699 if pid == self._thunk_pid:
2700 # Need to ignore processes we don't know about because the log is
2701 # system-wide. self._thunk_pid shall start only one process.
2702 # This is tricky though because Windows *loves* to reuse process id and
2703 # it happens often that the process ID of the thunk script created by
2704 # create_thunk() is reused. So just detecting the pid here is not
2705 # sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002706 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2707 logging.info(
2708 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2709 pid, self._trace_name, command_line, self._thunk_cmd)
2710 return
2711
2712 # TODO(maruel): The check is quite weak. Add the thunk path.
2713 if self._thunk_process:
2714 raise TracingFailure(
2715 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2716 'already set') % (self._thunk_pid, self._thunk_process.pid),
2717 None, None, None)
2718 proc = self.Process(self.blacklist, pid, None)
2719 self._thunk_process = proc
2720 return
2721 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002722 proc = self.Process(self.blacklist, pid, None)
2723 self.root_process = proc
2724 ppid = None
2725 elif self._process_lookup.get(ppid):
2726 proc = self.Process(self.blacklist, pid, None)
2727 self._process_lookup[ppid].children.append(proc)
2728 else:
2729 # Ignore
2730 return
2731 self._process_lookup[pid] = proc
2732
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002733 proc.command = command_line
2734 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002735 # proc.command[0] may be the absolute path of 'executable' but it may be
2736 # anything else too. If it happens that command[0] ends with executable,
2737 # use it, otherwise defaults to the base name.
2738 cmd0 = proc.command[0].lower()
2739 if not cmd0.endswith('.exe'):
2740 # TODO(maruel): That's not strictly true either.
2741 cmd0 += '.exe'
2742 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
2743 # Fix the path.
2744 cmd0 = cmd0.replace('/', os.path.sep)
2745 cmd0 = os.path.normpath(cmd0)
2746 proc.executable = get_native_path_case(cmd0)
2747 logging.info(
2748 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2749
2750 def handle_Thread_End(self, line):
2751 """Has the same parameters as Thread_Start."""
2752 tid = int(line[self.TID], 16)
2753 self._threads_active.pop(tid, None)
2754
2755 def handle_Thread_Start(self, line):
2756 """Handles a new thread created.
2757
2758 Do not use self.PID here since a process' initial thread is created by
2759 the parent process.
2760 """
2761 PROCESS_ID = self.USER_DATA
2762 TTHREAD_ID = self.USER_DATA + 1
2763 #STACK_BASE = self.USER_DATA + 2
2764 #STACK_LIMIT = self.USER_DATA + 3
2765 #USER_STACK_BASE = self.USER_DATA + 4
2766 #USER_STACK_LIMIT = self.USER_DATA + 5
2767 #AFFINITY = self.USER_DATA + 6
2768 #WIN32_START_ADDR = self.USER_DATA + 7
2769 #TEB_BASE = self.USER_DATA + 8
2770 #SUB_PROCESS_TAG = self.USER_DATA + 9
2771 #BASE_PRIORITY = self.USER_DATA + 10
2772 #PAGE_PRIORITY = self.USER_DATA + 11
2773 #IO_PRIORITY = self.USER_DATA + 12
2774 #THREAD_FLAGS = self.USER_DATA + 13
2775 # Do not use self.PID here since a process' initial thread is created by
2776 # the parent process.
2777 pid = int(line[PROCESS_ID], 16)
2778 tid = int(line[TTHREAD_ID], 16)
2779 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2780 self._threads_active[tid] = pid
2781
2782 @classmethod
2783 def supported_events(cls):
2784 """Returns all the procesed events."""
2785 out = []
2786 for member in dir(cls):
2787 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2788 if match:
2789 out.append(match.groups())
2790 return out
2791
2792 class Tracer(ApiBase.Tracer):
2793 # The basic headers.
2794 EXPECTED_HEADER = [
2795 u'Event Name',
2796 u'Type',
2797 u'Event ID',
2798 u'Version',
2799 u'Channel',
2800 u'Level', # 5
2801 u'Opcode',
2802 u'Task',
2803 u'Keyword',
2804 u'PID',
2805 u'TID', # 10
2806 u'Processor Number',
2807 u'Instance ID',
2808 u'Parent Instance ID',
2809 u'Activity ID',
2810 u'Related Activity ID', # 15
2811 u'Clock-Time',
2812 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2813 u'User(ms)', # pretty much useless.
2814 u'User Data', # Extra arguments that are event-specific.
2815 ]
2816 # Only the useful headers common to all entries are listed there. Any column
2817 # at 19 or higher is dependent on the specific event.
2818 EVENT_NAME = 0
2819 TYPE = 1
2820 PID = 9
2821 TID = 10
2822 PROCESSOR_ID = 11
2823 TIMESTAMP = 16
2824 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2825 USER_DATA = 19
2826
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002827 class CsvReader(object):
2828 """CSV reader that reads files generated by tracerpt.exe.
2829
2830 csv.reader() fails to read them properly, it mangles file names quoted
2831 with "" with a comma in it.
2832 """
2833 # 0. Had a ',' or one of the following ' ' after a comma, next should
2834 # be ' ', '"' or string or ',' for an empty field.
2835 ( HAD_DELIMITER,
2836 # 1. Processing an unquoted field up to ','.
2837 IN_STR,
2838 # 2. Processing a new field starting with '"'.
2839 STARTING_STR_QUOTED,
2840 # 3. Second quote in a row at the start of a field. It could be either
2841 # '""foo""' or '""'. Who the hell thought it was a great idea to use
2842 # the same character for delimiting and escaping?
2843 STARTING_SECOND_QUOTE,
2844 # 4. A quote inside a quoted string where the previous character was
2845 # not a quote, so the string is not empty. Can be either: end of a
2846 # quoted string (a delimiter) or a quote escape. The next char must be
2847 # either '"' or ','.
2848 HAD_QUOTE_IN_QUOTED,
2849 # 5. Second quote inside a quoted string.
2850 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
2851 # 6. Processing a field that started with '"'.
2852 IN_STR_QUOTED) = range(7)
2853
2854 def __init__(self, f):
2855 self.f = f
2856
2857 def __iter__(self):
2858 return self
2859
2860 def next(self):
2861 """Splits the line in fields."""
2862 line = self.f.readline()
2863 if not line:
2864 raise StopIteration()
2865 line = line.strip()
2866 fields = []
2867 state = self.HAD_DELIMITER
2868 for i, c in enumerate(line):
2869 if state == self.HAD_DELIMITER:
2870 if c == ',':
2871 # Empty field.
2872 fields.append('')
2873 elif c == ' ':
2874 # Ignore initial whitespaces
2875 pass
2876 elif c == '"':
2877 state = self.STARTING_STR_QUOTED
2878 fields.append('')
2879 else:
2880 # Start of a new field.
2881 state = self.IN_STR
2882 fields.append(c)
2883
2884 elif state == self.IN_STR:
2885 # Do not accept quote inside unquoted field.
2886 assert c != '"', (i, c, line, fields)
2887 if c == ',':
2888 fields[-1] = fields[-1].strip()
2889 state = self.HAD_DELIMITER
2890 else:
2891 fields[-1] = fields[-1] + c
2892
2893 elif state == self.STARTING_STR_QUOTED:
2894 if c == '"':
2895 # Do not store the character yet.
2896 state = self.STARTING_SECOND_QUOTE
2897 else:
2898 state = self.IN_STR_QUOTED
2899 fields[-1] = fields[-1] + c
2900
2901 elif state == self.STARTING_SECOND_QUOTE:
2902 if c == ',':
2903 # It was an empty field. '""' == ''.
2904 state = self.HAD_DELIMITER
2905 else:
2906 fields[-1] = fields[-1] + '"' + c
2907 state = self.IN_STR_QUOTED
2908
2909 elif state == self.HAD_QUOTE_IN_QUOTED:
2910 if c == ',':
2911 # End of the string.
2912 state = self.HAD_DELIMITER
2913 elif c == '"':
2914 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
2915 else:
2916 # The previous double-quote was just an unescaped quote.
2917 fields[-1] = fields[-1] + '"' + c
2918 state = self.IN_STR_QUOTED
2919
2920 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2921 if c == ',':
2922 # End of the string.
2923 state = self.HAD_DELIMITER
2924 fields[-1] = fields[-1] + '"'
2925 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00002926 # That's just how the logger rolls. Revert back to appending the
2927 # char and "guess" it was a quote in a double-quoted string.
2928 state = self.IN_STR_QUOTED
2929 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002930
2931 elif state == self.IN_STR_QUOTED:
2932 if c == '"':
2933 # Could be a delimiter or an escape.
2934 state = self.HAD_QUOTE_IN_QUOTED
2935 else:
2936 fields[-1] = fields[-1] + c
2937
2938 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2939 fields[-1] = fields[-1] + '"'
2940 else:
2941 assert state in (
2942 # Terminated with a normal field.
2943 self.IN_STR,
2944 # Terminated with an empty field.
2945 self.STARTING_SECOND_QUOTE,
2946 # Terminated with a normal quoted field.
2947 self.HAD_QUOTE_IN_QUOTED), (
2948 line, state, fields)
2949 return fields
2950
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002951 def __init__(self, logname):
2952 """Starts the log collection.
2953
2954 Requires administrative access. logman.exe is synchronous so no need for a
2955 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2956 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2957
2958 One can get the list of potentially interesting providers with:
2959 "logman query providers | findstr /i file"
2960 """
2961 super(LogmanTrace.Tracer, self).__init__(logname)
2962 self._script = create_thunk()
2963 cmd_start = [
2964 'logman.exe',
2965 'start',
2966 'NT Kernel Logger',
2967 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2968 # splitio,fileiocompletion,syscall,file,cswitch,img
2969 '(process,fileio,thread)',
2970 '-o', self._logname + '.etl',
2971 '-ets', # Send directly to kernel
2972 # Values extracted out of thin air.
2973 # Event Trace Session buffer size in kb.
2974 '-bs', '10240',
2975 # Number of Event Trace Session buffers.
2976 '-nb', '16', '256',
2977 ]
2978 logging.debug('Running: %s' % cmd_start)
2979 try:
2980 subprocess.check_call(
2981 cmd_start,
2982 stdin=subprocess.PIPE,
2983 stdout=subprocess.PIPE,
2984 stderr=subprocess.STDOUT)
2985 except subprocess.CalledProcessError, e:
2986 if e.returncode == -2147024891:
2987 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2988 elif e.returncode == -2144337737:
2989 print >> sys.stderr, (
2990 'A kernel trace was already running, stop it and try again')
2991 raise
2992
2993 def trace(self, cmd, cwd, tracename, output):
2994 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2995 assert os.path.isabs(cmd[0]), cmd[0]
2996 assert os.path.isabs(cwd), cwd
2997 assert os.path.normpath(cwd) == cwd, cwd
2998 with self._lock:
2999 if not self._initialized:
3000 raise TracingFailure(
3001 'Called Tracer.trace() on an unitialized object',
3002 None, None, None, tracename)
3003 assert tracename not in (i['trace'] for i in self._traces)
3004
3005 # Use "logman -?" for help.
3006
3007 stdout = stderr = None
3008 if output:
3009 stdout = subprocess.PIPE
3010 stderr = subprocess.STDOUT
3011
3012 # Run the child process.
3013 logging.debug('Running: %s' % cmd)
3014 # Use the temporary script generated with create_thunk() so we have a
3015 # clear pid owner. Since trace_inputs.py can be used as a library and
3016 # could trace multiple processes simultaneously, it makes it more complex
3017 # if the executable to be traced is executed directly here. It also solves
3018 # issues related to logman.exe that needs to be executed to control the
3019 # kernel trace.
3020 child_cmd = [
3021 sys.executable,
3022 self._script,
3023 tracename,
3024 ]
3025 child = subprocess.Popen(
3026 child_cmd + fix_python_path(cmd),
3027 cwd=cwd,
3028 stdin=subprocess.PIPE,
3029 stdout=stdout,
3030 stderr=stderr)
3031 logging.debug('Started child pid: %d' % child.pid)
3032 out = child.communicate()[0]
3033 # This doesn't mean all the grand-children are done. Sadly, we don't have
3034 # a good way to determine that.
3035
3036 with self._lock:
3037 assert tracename not in (i['trace'] for i in self._traces)
3038 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00003039 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003040 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00003041 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003042 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00003043 # Used to figure out the real process when process ids are reused.
3044 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003045 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003046 })
3047
3048 return child.returncode, out
3049
3050 def close(self, _timeout=None):
3051 """Stops the kernel log collection and converts the traces to text
3052 representation.
3053 """
3054 with self._lock:
3055 if not self._initialized:
3056 raise TracingFailure(
3057 'Called Tracer.close() on an unitialized object',
3058 None, None, None)
3059 os.remove(self._script)
3060 # Save metadata, add 'format' key..
3061 data = {
3062 'format': 'csv',
3063 'traces': self._traces,
3064 }
3065 write_json(self._logname, data, False)
3066
3067 cmd_stop = [
3068 'logman.exe',
3069 'stop',
3070 'NT Kernel Logger',
3071 '-ets', # Sends the command directly to the kernel.
3072 ]
3073 logging.debug('Running: %s' % cmd_stop)
3074 subprocess.check_call(
3075 cmd_stop,
3076 stdin=subprocess.PIPE,
3077 stdout=subprocess.PIPE,
3078 stderr=subprocess.STDOUT)
3079 self._initialized = False
3080
3081 def post_process_log(self):
3082 """Converts the .etl file into .csv then into .json."""
3083 super(LogmanTrace.Tracer, self).post_process_log()
3084 logformat = 'csv'
3085 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003086 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003087
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003088 def _trim_log(self, logformat):
3089 """Reduces the amount of data in original log by generating a 'reduced'
3090 log.
3091 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003092 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003093 file_handle = codecs.open(
3094 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003095
3096 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003097 assert sys.getfilesystemencoding() == 'mbcs'
3098 file_handle = codecs.open(
3099 self._logname + '.' + logformat, 'r',
3100 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003101
3102 supported_events = LogmanTrace.Context.supported_events()
3103
3104 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003105 """Loads items from the generator and returns the interesting data.
3106
3107 It filters out any uninteresting line and reduce the amount of data in
3108 the trace.
3109 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003110 for index, line in enumerate(generator):
3111 if not index:
3112 if line != self.EXPECTED_HEADER:
3113 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003114 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003115 None, None, None)
3116 continue
3117 # As you can see, the CSV is full of useful non-redundant information:
3118 if (line[2] != '0' or # Event ID
3119 line[3] not in ('2', '3') or # Version
3120 line[4] != '0' or # Channel
3121 line[5] != '0' or # Level
3122 line[7] != '0' or # Task
3123 line[8] != '0x0000000000000000' or # Keyword
3124 line[12] != '' or # Instance ID
3125 line[13] != '' or # Parent Instance ID
3126 line[14] != self.NULL_GUID or # Activity ID
3127 line[15] != ''): # Related Activity ID
3128 raise TracingFailure(
3129 'Found unexpected values in line: %s' % ' '.join(line),
3130 None, None, None)
3131
3132 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3133 continue
3134
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003135 yield [
3136 line[self.EVENT_NAME],
3137 line[self.TYPE],
3138 line[self.PID],
3139 line[self.TID],
3140 line[self.PROCESSOR_ID],
3141 line[self.TIMESTAMP],
3142 ] + line[self.USER_DATA:]
3143
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003144 # must not convert the trim() call into a list, since it will use too much
3145 # memory for large trace. use a csv file as a workaround since the json
3146 # parser requires a complete in-memory file.
3147 with open('%s.preprocessed' % self._logname, 'wb') as f:
3148 # $ and * can't be used in file name on windows, reducing the likelihood
3149 # of having to escape a string.
3150 out = csv.writer(
3151 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003152 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003153 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003154
3155 def _convert_log(self, logformat):
3156 """Converts the ETL trace to text representation.
3157
3158 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3159 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3160
3161 Arguments:
3162 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3163
3164 Use "tracerpt -?" for help.
3165 """
3166 LOCALE_INVARIANT = 0x7F
3167 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3168 cmd_convert = [
3169 'tracerpt.exe',
3170 '-l', self._logname + '.etl',
3171 '-o', self._logname + '.' + logformat,
3172 '-gmt', # Use UTC
3173 '-y', # No prompt
3174 # Use -of XML to get the header of each items after column 19, e.g. all
3175 # the actual headers of 'User Data'.
3176 ]
3177
3178 if logformat == 'csv':
3179 # tracerpt localizes the 'Type' column, for major brainfuck
3180 # entertainment. I can't imagine any sane reason to do that.
3181 cmd_convert.extend(['-of', 'CSV'])
3182 elif logformat == 'csv_utf16':
3183 # This causes it to use UTF-16, which doubles the log size but ensures
3184 # the log is readable for non-ASCII characters.
3185 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3186 elif logformat == 'xml':
3187 cmd_convert.extend(['-of', 'XML'])
3188 else:
3189 raise ValueError('Unexpected log format \'%s\'' % logformat)
3190 logging.debug('Running: %s' % cmd_convert)
3191 # This can takes tens of minutes for large logs.
3192 # Redirects all output to stderr.
3193 subprocess.check_call(
3194 cmd_convert,
3195 stdin=subprocess.PIPE,
3196 stdout=sys.stderr,
3197 stderr=sys.stderr)
3198
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003199 def __init__(self, use_sudo=False): # pylint: disable=W0613
3200 super(LogmanTrace, self).__init__()
3201 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3202
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003203 @staticmethod
3204 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003205 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003206 if os.path.isfile(logname + ext):
3207 os.remove(logname + ext)
3208
3209 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003210 def parse_log(cls, logname, blacklist, trace_name):
3211 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003212 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003213
3214 def blacklist_more(filepath):
3215 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3216 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3217
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003218 # Create a list of (Context, result_dict) tuples. This is necessary because
3219 # the csv file may be larger than the amount of available memory.
3220 contexes = [
3221 (
3222 cls.Context(
3223 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3224 {
3225 'output': item['output'],
3226 'trace': item['trace'],
3227 },
3228 )
3229 for item in read_json(logname)['traces']
3230 if not trace_name or item['trace'] == trace_name
3231 ]
3232
3233 # The log may be too large to fit in memory and it is not efficient to read
3234 # it multiple times, so multiplex the contexes instead, which is slightly
3235 # more awkward.
3236 with open('%s.preprocessed' % logname, 'rb') as f:
3237 lines = csv.reader(
3238 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3239 for encoded in lines:
3240 line = [s.decode('utf-8') for s in encoded]
3241 # Convert the PID in-place from hex.
3242 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3243 for context in contexes:
3244 if 'exception' in context[1]:
3245 continue
3246 try:
3247 context[0].on_line(line)
3248 except TracingFailure:
3249 context[1]['exception'] = sys.exc_info()
3250
3251 for context in contexes:
3252 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003253 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003254 context[1]['results'] = context[0].to_results()
3255
3256 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003257
3258
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003259def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003260 """Returns the correct implementation for the current OS."""
3261 if sys.platform == 'cygwin':
3262 raise NotImplementedError(
3263 'Not implemented for cygwin, start the script from Win32 python')
3264 flavors = {
3265 'win32': LogmanTrace,
3266 'darwin': Dtrace,
3267 'sunos5': Dtrace,
3268 'freebsd7': Dtrace,
3269 'freebsd8': Dtrace,
3270 }
3271 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003272 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003273
3274
3275def extract_directories(root_dir, files, blacklist):
3276 """Detects if all the files in a directory are in |files| and if so, replace
3277 the individual files by a Results.Directory instance.
3278
3279 Takes a list of Results.File instances and returns a shorter list of
3280 Results.File and Results.Directory instances.
3281
3282 Arguments:
3283 - root_dir: Optional base directory that shouldn't be search further.
3284 - files: list of Results.File instances.
3285 - blacklist: regexp of files to ignore, for example r'.+\.pyc'.
3286 """
3287 logging.info(
3288 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3289 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003290 # It is important for root_dir to not be a symlinked path, make sure to call
3291 # os.path.realpath() as needed.
3292 assert not root_dir or (
3293 os.path.realpath(get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003294 assert not any(isinstance(f, Results.Directory) for f in files)
3295 # Remove non existent files.
3296 files = [f for f in files if f.existent]
3297 if not files:
3298 return files
3299 # All files must share the same root, which can be None.
3300 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3301
3302 # Creates a {directory: {filename: File}} mapping, up to root.
3303 buckets = {}
3304 if root_dir:
3305 buckets[root_dir] = {}
3306 for fileobj in files:
3307 path = fileobj.full_path
3308 directory = os.path.dirname(path)
3309 assert directory
3310 # Do not use os.path.basename() so trailing os.path.sep is kept.
3311 basename = path[len(directory)+1:]
3312 files_in_directory = buckets.setdefault(directory, {})
3313 files_in_directory[basename] = fileobj
3314 # Add all the directories recursively up to root.
3315 while True:
3316 old_d = directory
3317 directory = os.path.dirname(directory)
3318 if directory + os.path.sep == root_dir or directory == old_d:
3319 break
3320 buckets.setdefault(directory, {})
3321
3322 root_prefix = len(root_dir) + 1 if root_dir else 0
3323 for directory in sorted(buckets, reverse=True):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003324 if not os.path.isdir(directory):
3325 logging.debug(
3326 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3327 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003328 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3329 expected = set(buckets[directory])
3330 if not (actual - expected):
3331 parent = os.path.dirname(directory)
3332 buckets[parent][os.path.basename(directory)] = Results.Directory(
3333 root_dir,
3334 directory[root_prefix:],
3335 False,
3336 sum(f.size for f in buckets[directory].itervalues()),
3337 sum(f.nb_files for f in buckets[directory].itervalues()))
3338 # Remove the whole bucket.
3339 del buckets[directory]
3340
3341 # Reverse the mapping with what remains. The original instances are returned,
3342 # so the cached meta data is kept.
3343 files = sum((x.values() for x in buckets.itervalues()), [])
3344 return sorted(files, key=lambda x: x.path)
3345
3346
3347def trace(logfile, cmd, cwd, api, output):
3348 """Traces an executable. Returns (returncode, output) from api.
3349
3350 Arguments:
3351 - logfile: file to write to.
3352 - cmd: command to run.
3353 - cwd: current directory to start the process in.
3354 - api: a tracing api instance.
3355 - output: if True, returns output, otherwise prints it at the console.
3356 """
3357 cmd = fix_python_path(cmd)
3358 api.clean_trace(logfile)
3359 with api.get_tracer(logfile) as tracer:
3360 return tracer.trace(cmd, cwd, 'default', output)
3361
3362
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003363def CMDclean(args):
3364 """Cleans up traces."""
3365 parser = OptionParserTraceInputs(command='clean')
3366 options, args = parser.parse_args(args)
3367 api = get_api()
3368 api.clean_trace(options.log)
3369 return 0
3370
3371
3372def CMDtrace(args):
3373 """Traces an executable."""
3374 parser = OptionParserTraceInputs(command='trace')
3375 parser.allow_interspersed_args = False
3376 parser.add_option(
3377 '-q', '--quiet', action='store_true',
3378 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003379 parser.add_option(
3380 '-s', '--sudo', action='store_true',
3381 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3382 parser.add_option(
3383 '-n', '--no-sudo', action='store_false',
3384 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003385 options, args = parser.parse_args(args)
3386
3387 if not args:
3388 parser.error('Please provide a command to run')
3389
3390 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3391 args[0] = os.path.abspath(args[0])
3392
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003393 # options.sudo default value is None, which is to do whatever tracer defaults
3394 # do.
3395 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003396 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3397
3398
3399def CMDread(args):
3400 """Reads the logs and prints the result."""
3401 parser = OptionParserTraceInputs(command='read')
3402 parser.add_option(
3403 '-V', '--variable',
3404 nargs=2,
3405 action='append',
3406 dest='variables',
3407 metavar='VAR_NAME directory',
3408 default=[],
3409 help=('Variables to replace relative directories against. Example: '
3410 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3411 'home dir with $HOME') % getpass.getuser())
3412 parser.add_option(
3413 '--root-dir',
3414 help='Root directory to base everything off it. Anything outside of this '
3415 'this directory will not be reported')
3416 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003417 '--trace-name',
3418 help='Only reads one of the trace. Defaults to reading all traces')
3419 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003420 '-j', '--json', action='store_true',
3421 help='Outputs raw result data as json')
3422 parser.add_option(
3423 '-b', '--blacklist', action='append', default=[],
3424 help='List of regexp to use as blacklist filter')
3425 options, args = parser.parse_args(args)
3426
3427 if options.root_dir:
maruel@chromium.org7dc17b32013-04-02 22:27:05 +00003428 options.root_dir = get_native_path_case(
3429 unicode(os.path.abspath(options.root_dir)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003430
3431 variables = dict(options.variables)
3432 api = get_api()
3433 def blacklist(f):
3434 return any(re.match(b, f) for b in options.blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003435 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003436 # Process each trace.
3437 output_as_json = []
3438 for item in data:
3439 if 'exception' in item:
maruel@chromium.org424f5692012-10-15 15:42:38 +00003440 # Do not abort the other traces.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003441 print >> sys.stderr, (
maruel@chromium.org424f5692012-10-15 15:42:38 +00003442 'Trace %s: Got an exception: %s' % (
3443 item['trace'], item['exception'][1]))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003444 continue
3445 results = item['results']
3446 if options.root_dir:
3447 results = results.strip_root(options.root_dir)
3448
3449 if options.json:
3450 output_as_json.append(results.flatten())
3451 else:
3452 simplified = extract_directories(
3453 options.root_dir, results.files, blacklist)
3454 simplified = [f.replace_variables(variables) for f in simplified]
3455 if len(data) > 1:
3456 print('Trace: %s' % item['trace'])
3457 print('Total: %d' % len(results.files))
3458 print('Non existent: %d' % len(results.non_existent))
3459 for f in results.non_existent:
3460 print(' %s' % f.path)
3461 print(
3462 'Interesting: %d reduced to %d' % (
3463 len(results.existent), len(simplified)))
3464 for f in simplified:
3465 print(' %s' % f.path)
3466
3467 if options.json:
3468 write_json(sys.stdout, output_as_json, False)
3469 return 0
3470
3471
3472class OptionParserWithLogging(optparse.OptionParser):
3473 """Adds --verbose option."""
3474 def __init__(self, verbose=0, **kwargs):
3475 optparse.OptionParser.__init__(self, **kwargs)
3476 self.add_option(
3477 '-v', '--verbose',
3478 action='count',
3479 default=verbose,
3480 help='Use multiple times to increase verbosity')
3481
3482 def parse_args(self, *args, **kwargs):
3483 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
3484 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
3485 logging.basicConfig(
3486 level=levels[min(len(levels)-1, options.verbose)],
3487 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
3488 return options, args
3489
3490
3491class OptionParserWithNiceDescription(OptionParserWithLogging):
3492 """Generates the description with the command's docstring."""
3493 def __init__(self, **kwargs):
3494 """Sets 'description' and 'usage' if not already specified."""
3495 command = kwargs.pop('command', 'help')
3496 kwargs.setdefault(
3497 'description',
3498 re.sub('[\r\n ]{2,}', ' ', get_command_handler(command).__doc__))
3499 kwargs.setdefault('usage', '%%prog %s [options]' % command)
3500 OptionParserWithLogging.__init__(self, **kwargs)
3501
3502
3503class OptionParserTraceInputs(OptionParserWithNiceDescription):
3504 """Adds automatic --log handling."""
3505 def __init__(self, **kwargs):
3506 OptionParserWithNiceDescription.__init__(self, **kwargs)
3507 self.add_option(
3508 '-l', '--log', help='Log file to generate or read, required')
3509
3510 def parse_args(self, *args, **kwargs):
3511 """Makes sure the paths make sense.
3512
3513 On Windows, / and \ are often mixed together in a path.
3514 """
3515 options, args = OptionParserWithNiceDescription.parse_args(
3516 self, *args, **kwargs)
3517 if not options.log:
3518 self.error('Must supply a log file with -l')
3519 options.log = os.path.abspath(options.log)
3520 return options, args
3521
3522
3523def extract_documentation():
3524 """Returns a dict {command: description} for each of documented command."""
3525 commands = (
3526 fn[3:]
3527 for fn in dir(sys.modules['__main__'])
3528 if fn.startswith('CMD') and get_command_handler(fn[3:]).__doc__)
3529 return dict((fn, get_command_handler(fn).__doc__) for fn in commands)
3530
3531
3532def CMDhelp(args):
3533 """Prints list of commands or help for a specific command."""
3534 doc = extract_documentation()
3535 # Calculates the optimal offset.
3536 offset = max(len(cmd) for cmd in doc)
3537 format_str = ' %-' + str(offset + 2) + 's %s'
3538 # Generate a one-liner documentation of each commands.
3539 commands_description = '\n'.join(
3540 format_str % (cmd, doc[cmd].split('\n')[0]) for cmd in sorted(doc))
3541
3542 parser = OptionParserWithNiceDescription(
3543 usage='%prog <command> [options]',
3544 description='Commands are:\n%s\n' % commands_description)
3545 parser.format_description = lambda _: parser.description
3546
3547 # Strip out any -h or --help argument.
3548 _, args = parser.parse_args([i for i in args if not i in ('-h', '--help')])
3549 if len(args) == 1:
3550 if not get_command_handler(args[0]):
3551 parser.error('Unknown command %s' % args[0])
3552 # The command was "%prog help command", replaces ourself with
3553 # "%prog command --help" so help is correctly printed out.
3554 return main(args + ['--help'])
3555 elif args:
3556 parser.error('Unknown argument "%s"' % ' '.join(args))
3557 parser.print_help()
3558 return 0
3559
3560
3561def get_command_handler(name):
3562 """Returns the command handler or CMDhelp if it doesn't exist."""
3563 return getattr(sys.modules['__main__'], 'CMD%s' % name, None)
3564
3565
3566def main_impl(argv):
3567 command = get_command_handler(argv[0] if argv else 'help')
3568 if not command:
3569 return CMDhelp(argv)
3570 return command(argv[1:])
3571
3572def main(argv):
maruel@chromium.org46e61cc2013-03-25 19:55:34 +00003573 run_isolated.disable_buffering()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003574 try:
3575 main_impl(argv)
3576 except TracingFailure, e:
3577 sys.stderr.write('\nError: ')
3578 sys.stderr.write(str(e))
3579 sys.stderr.write('\n')
3580 return 1
3581
3582
3583if __name__ == '__main__':
3584 sys.exit(main(sys.argv[1:]))