blob: 8f660316866f73c6c253c2f0ad86b3cc2a8f1835 [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
3# Copyright (c) 2012 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
23import getpass
24import glob
25import json
26import logging
maruel@chromium.org306e0e72012-11-02 18:22:03 +000027import unicodedata
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000028import optparse
29import os
30import re
31import subprocess
32import sys
33import tempfile
34import threading
35import time
36import weakref
37
38## OS-specific imports
39
40if sys.platform == 'win32':
41 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p
42 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611
43 from ctypes.wintypes import GetLastError # pylint: disable=E0611
44elif sys.platform == 'darwin':
45 import Carbon.File # pylint: disable=F0401
46 import MacOS # pylint: disable=F0401
47
48
49BASE_DIR = os.path.dirname(os.path.abspath(__file__))
50ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
51
52
53class TracingFailure(Exception):
54 """An exception occured during tracing."""
55 def __init__(self, description, pid, line_number, line, *args):
56 super(TracingFailure, self).__init__(
57 description, pid, line_number, line, *args)
58 self.description = description
59 self.pid = pid
60 self.line_number = line_number
61 self.line = line
62 self.extra = args
63
64 def __str__(self):
65 out = self.description
66 if self.pid:
67 out += '\npid: %d' % self.pid
68 if self.line_number:
69 out += '\nline: %d' % self.line_number
70 if self.line:
71 out += '\n%s' % self.line
72 if self.extra:
73 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
74 return out
75
76
77## OS-specific functions
78
79if sys.platform == 'win32':
80 def QueryDosDevice(drive_letter):
81 """Returns the Windows 'native' path for a DOS drive letter."""
82 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
maruel@chromium.org306e0e72012-11-02 18:22:03 +000083 assert isinstance(drive_letter, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000084 # Guesswork. QueryDosDeviceW never returns the required number of bytes.
85 chars = 1024
maruel@chromium.org306e0e72012-11-02 18:22:03 +000086 drive_letter = drive_letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000087 p = create_unicode_buffer(chars)
88 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):
89 err = GetLastError()
90 if err:
91 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +000092 msg = u'QueryDosDevice(%s): %s (%d)' % (
93 drive_letter, FormatError(err), err)
94 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000095 return p.value
96
97
98 def GetShortPathName(long_path):
99 """Returns the Windows short path equivalent for a 'long' path."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000100 assert isinstance(long_path, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000101 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
102 # not enforced.
103 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):
104 long_path = '\\\\?\\' + long_path
105 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)
106 if chars:
107 p = create_unicode_buffer(chars)
108 if windll.kernel32.GetShortPathNameW(long_path, p, chars):
109 return p.value
110
111 err = GetLastError()
112 if err:
113 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000114 msg = u'GetShortPathName(%s): %s (%d)' % (
115 long_path, FormatError(err), err)
116 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000117
118
119 def GetLongPathName(short_path):
120 """Returns the Windows long path equivalent for a 'short' path."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000121 assert isinstance(short_path, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000122 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
123 # not enforced.
124 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):
125 short_path = '\\\\?\\' + short_path
126 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)
127 if chars:
128 p = create_unicode_buffer(chars)
129 if windll.kernel32.GetLongPathNameW(short_path, p, chars):
130 return p.value
131
132 err = GetLastError()
133 if err:
134 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000135 msg = u'GetLongPathName(%s): %s (%d)' % (
136 short_path, FormatError(err), err)
137 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000138
139
140 def get_current_encoding():
141 """Returns the 'ANSI' code page associated to the process."""
142 return 'cp%d' % int(windll.kernel32.GetACP())
143
144
145 class DosDriveMap(object):
146 """Maps \Device\HarddiskVolumeN to N: on Windows."""
147 # Keep one global cache.
148 _MAPPING = {}
149
150 def __init__(self):
151 """Lazy loads the cache."""
152 if not self._MAPPING:
153 # This is related to UNC resolver on windows. Ignore that.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000154 self._MAPPING[u'\\Device\\Mup'] = None
155 self._MAPPING[u'\\SystemRoot'] = os.environ[u'SystemRoot']
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000156
157 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
158 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000159 letter = u'%s:' % letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000160 mapped = QueryDosDevice(letter)
161 if mapped in self._MAPPING:
162 logging.warn(
163 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
164 '. Drive letters are a user-mode concept and the kernel '
165 'traces only have NT path, so all accesses will be '
166 'associated with the first drive letter, independent of the '
167 'actual letter used by the code') % (
168 self._MAPPING[mapped], letter))
169 else:
170 self._MAPPING[mapped] = letter
171 except WindowsError: # pylint: disable=E0602
172 pass
173
174 def to_win32(self, path):
175 """Converts a native NT path to Win32/DOS compatible path."""
176 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
177 if not match:
178 raise ValueError(
179 'Can\'t convert %s into a Win32 compatible path' % path,
180 path)
181 if not match.group(1) in self._MAPPING:
182 # Unmapped partitions may be accessed by windows for the
183 # fun of it while the test is running. Discard these.
184 return None
185 drive = self._MAPPING[match.group(1)]
186 if not drive or not match.group(2):
187 return drive
188 return drive + match.group(2)
189
190
191 def isabs(path):
192 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""
193 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
194
195
196 def get_native_path_case(p):
197 """Returns the native path case for an existing file.
198
199 On Windows, removes any leading '\\?\'.
200 """
201 if not isabs(p):
202 raise ValueError(
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000203 'get_native_path_case(%r): Require an absolute path' % p, p)
204
maruel@chromium.org037758d2012-12-10 17:59:46 +0000205 # Make sure it is normalized to os.path.sep. Do not do it here to keep the
206 # function fast
207 assert '/' not in p, p
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000208 suffix = ''
209 count = p.count(':')
210 if count > 1:
211 # This means it has an alternate-data stream. There could be 3 ':', since
212 # it could be the $DATA datastream of an ADS. Split the whole ADS suffix
213 # off and add it back afterward. There is no way to know the native path
214 # case of an alternate data stream.
215 items = p.split(':')
216 p = ':'.join(items[0:2])
217 suffix = ''.join(':' + i for i in items[2:])
218
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000219 # Windows used to have an option to turn on case sensitivity on non Win32
220 # subsystem but that's out of scope here and isn't supported anymore.
221 # Go figure why GetShortPathName() is needed.
222 try:
223 out = GetLongPathName(GetShortPathName(p))
224 except OSError, e:
225 if e.args[0] in (2, 3, 5):
226 # The path does not exist. Try to recurse and reconstruct the path.
227 base = os.path.dirname(p)
228 rest = os.path.basename(p)
229 return os.path.join(get_native_path_case(base), rest)
230 raise
231 if out.startswith('\\\\?\\'):
232 out = out[4:]
233 # Always upper case the first letter since GetLongPathName() will return the
234 # drive letter in the case it was given.
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000235 return out[0].upper() + out[1:] + suffix
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000236
237
238 def CommandLineToArgvW(command_line):
239 """Splits a commandline into argv using CommandLineToArgvW()."""
240 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
241 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000242 assert isinstance(command_line, unicode)
243 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000244 try:
245 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
246 finally:
247 windll.kernel32.LocalFree(ptr)
248
249
250elif sys.platform == 'darwin':
251
252
253 # On non-windows, keep the stdlib behavior.
254 isabs = os.path.isabs
255
256
257 def _find_item_native_case(root_path, item):
258 """Gets the native path case of a single item based at root_path.
259
260 There is no API to get the native path case of symlinks on OSX. So it
261 needs to be done the slow way.
262 """
263 item = item.lower()
264 for element in os.listdir(root_path):
265 if element.lower() == item:
266 return element
267
268
269 def _native_case(p):
270 """Gets the native path case. Warning: this function resolves symlinks."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000271 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000272 rel_ref, _ = Carbon.File.FSPathMakeRef(p.encode('utf-8'))
273 # The OSX underlying code uses NFD but python strings are in NFC. This
274 # will cause issues with os.listdir() for example. Since the dtrace log
275 # *is* in NFC, normalize it here.
276 out = unicodedata.normalize(
277 'NFC', rel_ref.FSRefMakePath().decode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000278 if p.endswith(os.path.sep) and not out.endswith(os.path.sep):
279 return out + os.path.sep
280 return out
281 except MacOS.Error, e:
282 if e.args[0] in (-43, -120):
283 # The path does not exist. Try to recurse and reconstruct the path.
284 # -43 means file not found.
285 # -120 means directory not found.
286 base = os.path.dirname(p)
287 rest = os.path.basename(p)
288 return os.path.join(_native_case(base), rest)
289 raise OSError(
290 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])
291
292
293 def _split_at_symlink_native(base_path, rest):
294 """Returns the native path for a symlink."""
295 base, symlink, rest = split_at_symlink(base_path, rest)
296 if symlink:
297 if not base_path:
298 base_path = base
299 else:
300 base_path = safe_join(base_path, base)
301 symlink = _find_item_native_case(base_path, symlink)
302 return base, symlink, rest
303
304
305 def get_native_path_case(path):
306 """Returns the native path case for an existing file.
307
308 Technically, it's only HFS+ on OSX that is case preserving and
309 insensitive. It's the default setting on HFS+ but can be changed.
310 """
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000311 assert isinstance(path, unicode), path
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000312 if not isabs(path):
313 raise ValueError(
314 'Can\'t get native path case for a non-absolute path: %s' % path,
315 path)
316 if path.startswith('/dev'):
317 # /dev is not visible from Carbon, causing an exception.
318 return path
319
320 # Starts assuming there is no symlink along the path.
321 resolved = _native_case(path)
322 if resolved.lower() == path.lower():
323 # This code path is incredibly faster.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000324 logging.debug('get_native_path_case(%s) = %s' % (path, resolved))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000325 return resolved
326
327 # There was a symlink, process it.
328 base, symlink, rest = _split_at_symlink_native(None, path)
329 assert symlink, (path, base, symlink, rest, resolved)
330 prev = base
331 base = safe_join(_native_case(base), symlink)
332 assert len(base) > len(prev)
333 while rest:
334 prev = base
335 relbase, symlink, rest = _split_at_symlink_native(base, rest)
336 base = safe_join(base, relbase)
337 assert len(base) > len(prev), (prev, base, symlink)
338 if symlink:
339 base = safe_join(base, symlink)
340 assert len(base) > len(prev), (prev, base, symlink)
341 # Make sure no symlink was resolved.
342 assert base.lower() == path.lower(), (base, path)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000343 logging.debug('get_native_path_case(%s) = %s' % (path, base))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000344 return base
345
346
347else: # OSes other than Windows and OSX.
348
349
350 # On non-windows, keep the stdlib behavior.
351 isabs = os.path.isabs
352
353
354 def get_native_path_case(path):
355 """Returns the native path case for an existing file.
356
357 On OSes other than OSX and Windows, assume the file system is
358 case-sensitive.
359
360 TODO(maruel): This is not strictly true. Implement if necessary.
361 """
362 if not isabs(path):
363 raise ValueError(
364 'Can\'t get native path case for a non-absolute path: %s' % path,
365 path)
366 # Give up on cygwin, as GetLongPathName() can't be called.
367 # Linux traces tends to not be normalized so use this occasion to normalize
368 # it. This function implementation already normalizes the path on the other
369 # OS so this needs to be done here to be coherent between OSes.
370 out = os.path.normpath(path)
371 if path.endswith(os.path.sep) and not out.endswith(os.path.sep):
372 return out + os.path.sep
373 return out
374
375
376if sys.platform != 'win32': # All non-Windows OSes.
377
378
379 def safe_join(*args):
380 """Joins path elements like os.path.join() but doesn't abort on absolute
381 path.
382
383 os.path.join('foo', '/bar') == '/bar'
384 but safe_join('foo', '/bar') == 'foo/bar'.
385 """
386 out = ''
387 for element in args:
388 if element.startswith(os.path.sep):
389 if out.endswith(os.path.sep):
390 out += element[1:]
391 else:
392 out += element
393 else:
394 if out.endswith(os.path.sep):
395 out += element
396 else:
397 out += os.path.sep + element
398 return out
399
400
401 def split_at_symlink(base_dir, relfile):
402 """Scans each component of relfile and cut the string at the symlink if
403 there is any.
404
405 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if
406 not symlink was found.
407 """
408 if base_dir:
409 assert relfile
410 assert os.path.isabs(base_dir)
411 index = 0
412 else:
413 assert os.path.isabs(relfile)
414 index = 1
415
416 def at_root(rest):
417 if base_dir:
418 return safe_join(base_dir, rest)
419 return rest
420
421 while True:
422 try:
423 index = relfile.index(os.path.sep, index)
424 except ValueError:
425 index = len(relfile)
426 full = at_root(relfile[:index])
427 if os.path.islink(full):
428 # A symlink!
429 base = os.path.dirname(relfile[:index])
430 symlink = os.path.basename(relfile[:index])
431 rest = relfile[index:]
432 logging.debug(
433 'split_at_symlink(%s, %s) -> (%s, %s, %s)' %
434 (base_dir, relfile, base, symlink, rest))
435 return base, symlink, rest
436 if index == len(relfile):
437 break
438 index += 1
439 return relfile, None, None
440
441
442def fix_python_path(cmd):
443 """Returns the fixed command line to call the right python executable."""
444 out = cmd[:]
445 if out[0] == 'python':
446 out[0] = sys.executable
447 elif out[0].endswith('.py'):
448 out.insert(0, sys.executable)
449 return out
450
451
452def create_thunk():
453 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
454 os.write(
455 handle,
456 (
457 'import subprocess\n'
458 'import sys\n'
459 'sys.exit(subprocess.call(sys.argv[2:]))\n'
460 ))
461 os.close(handle)
462 return name
463
464
465def strace_process_quoted_arguments(text):
466 """Extracts quoted arguments on a string and return the arguments as a list.
467
468 Implemented as an automaton. Supports incomplete strings in the form
469 '"foo"...'.
470
471 Example:
472 With text = '"foo", "bar"', the function will return ['foo', 'bar']
473
474 TODO(maruel): Implement escaping.
475 """
476 # All the possible states of the DFA.
477 ( NEED_QUOTE, # Begining of a new arguments.
478 INSIDE_STRING, # Inside an argument.
479 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
480 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
481 # a serie of 3 dots or a comma.
482 NEED_SPACE, # Right after a comma
483 NEED_DOT_2, # Found a dot, need a second one.
484 NEED_DOT_3, # Found second dot, need a third one.
485 NEED_COMMA, # Found third dot, need a comma.
486 ) = range(8)
487
488 state = NEED_QUOTE
489 out = []
490 for index, char in enumerate(text):
491 if char == '"':
492 if state == NEED_QUOTE:
493 state = INSIDE_STRING
494 # A new argument was found.
495 out.append('')
496 elif state == INSIDE_STRING:
497 # The argument is now closed.
498 state = NEED_COMMA_OR_DOT
499 elif state == ESCAPED:
500 out[-1] += char
501 state = INSIDE_STRING
502 else:
503 raise ValueError(
504 'Can\'t process char at column %d for: %r' % (index, text),
505 index,
506 text)
507 elif char == ',':
508 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
509 state = NEED_SPACE
510 elif state == INSIDE_STRING:
511 out[-1] += char
512 elif state == ESCAPED:
513 out[-1] += char
514 state = INSIDE_STRING
515 else:
516 raise ValueError(
517 'Can\'t process char at column %d for: %r' % (index, text),
518 index,
519 text)
520 elif char == ' ':
521 if state == NEED_SPACE:
522 state = NEED_QUOTE
523 elif state == INSIDE_STRING:
524 out[-1] += char
525 elif state == ESCAPED:
526 out[-1] += char
527 state = INSIDE_STRING
528 else:
529 raise ValueError(
530 'Can\'t process char at column %d for: %r' % (index, text),
531 index,
532 text)
533 elif char == '.':
534 if state == NEED_COMMA_OR_DOT:
535 # The string is incomplete, this mean the strace -s flag should be
536 # increased.
537 state = NEED_DOT_2
538 elif state == NEED_DOT_2:
539 state = NEED_DOT_3
540 elif state == NEED_DOT_3:
541 state = NEED_COMMA
542 elif state == INSIDE_STRING:
543 out[-1] += char
544 elif state == ESCAPED:
545 out[-1] += char
546 state = INSIDE_STRING
547 else:
548 raise ValueError(
549 'Can\'t process char at column %d for: %r' % (index, text),
550 index,
551 text)
552 elif char == '\\':
553 if state == ESCAPED:
554 out[-1] += char
555 state = INSIDE_STRING
556 elif state == INSIDE_STRING:
557 state = ESCAPED
558 else:
559 raise ValueError(
560 'Can\'t process char at column %d for: %r' % (index, text),
561 index,
562 text)
563 else:
564 if state == INSIDE_STRING:
565 out[-1] += char
566 else:
567 raise ValueError(
568 'Can\'t process char at column %d for: %r' % (index, text),
569 index,
570 text)
571 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
572 raise ValueError(
573 'String is incorrectly terminated: %r' % text,
574 text)
575 return out
576
577
578def read_json(filepath):
579 with open(filepath, 'r') as f:
580 return json.load(f)
581
582
583def write_json(filepath_or_handle, data, dense):
584 """Writes data into filepath or file handle encoded as json.
585
586 If dense is True, the json is packed. Otherwise, it is human readable.
587 """
588 if hasattr(filepath_or_handle, 'write'):
589 if dense:
590 filepath_or_handle.write(
591 json.dumps(data, sort_keys=True, separators=(',',':')))
592 else:
593 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
594 else:
595 with open(filepath_or_handle, 'wb') as f:
596 if dense:
597 json.dump(data, f, sort_keys=True, separators=(',',':'))
598 else:
599 json.dump(data, f, sort_keys=True, indent=2)
600
601
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000602def assert_is_renderable(pseudo_string):
603 """Asserts the input is a valid object to be processed by render()."""
604 assert (
605 isinstance(pseudo_string, (None.__class__, unicode)) or
606 hasattr(pseudo_string, 'render')), repr(pseudo_string)
607
608
609def render(pseudo_string):
610 """Converts the pseudo-string to an unicode string."""
611 assert_is_renderable(pseudo_string)
612 if isinstance(pseudo_string, (None.__class__, unicode)):
613 return pseudo_string
614 return pseudo_string.render()
615
616
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000617class Results(object):
618 """Results of a trace session."""
619
620 class _TouchedObject(object):
621 """Something, a file or a directory, that was accessed."""
622 def __init__(self, root, path, tainted, size, nb_files):
623 logging.debug(
624 '%s(%s, %s, %s, %s, %s)' %
625 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000626 assert_is_renderable(root)
627 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000628 self.root = root
629 self.path = path
630 self.tainted = tainted
631 self.nb_files = nb_files
632 # Can be used as a cache or a default value, depending on context.
633 self._size = size
634 # These are cache only.
635 self._real_path = None
636
637 # Check internal consistency.
638 assert path, path
639 assert tainted or bool(root) != bool(isabs(path)), (root, path)
640 assert tainted or (
641 not os.path.exists(self.full_path) or
642 (self.full_path == get_native_path_case(self.full_path))), (
643 tainted, self.full_path, get_native_path_case(self.full_path))
644
645 @property
646 def existent(self):
647 return self.size != -1
648
649 @property
650 def full_path(self):
651 if self.root:
652 return os.path.join(self.root, self.path)
653 return self.path
654
655 @property
656 def real_path(self):
657 """Returns the path with symlinks resolved."""
658 if not self._real_path:
659 self._real_path = os.path.realpath(self.full_path)
660 return self._real_path
661
662 @property
663 def size(self):
664 """File's size. -1 is not existent."""
665 if self._size is None and not self.tainted:
666 try:
667 self._size = os.stat(self.full_path).st_size
668 except OSError:
669 self._size = -1
670 return self._size
671
672 def flatten(self):
673 """Returns a dict representing this object.
674
675 A 'size' of 0 means the file was only touched and not read.
676 """
677 return {
678 'path': self.path,
679 'size': self.size,
680 }
681
682 def replace_variables(self, variables):
683 """Replaces the root of this File with one of the variables if it matches.
684
685 If a variable replacement occurs, the cloned object becomes tainted.
686 """
687 for variable, root_path in variables.iteritems():
688 if self.path.startswith(root_path):
689 return self._clone(
690 self.root, variable + self.path[len(root_path):], True)
691 # No need to clone, returns ourself.
692 return self
693
694 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000695 """Returns a clone of itself with 'root' stripped off.
696
697 Note that the file is kept if it is either accessible from a symlinked
698 path that was used to access the file or through the real path.
699 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000700 # Check internal consistency.
701 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root
702 if not self.full_path.startswith(root):
703 # Now try to resolve the symlinks to see if it can be reached this way.
704 # Only try *after* trying without resolving symlink.
705 if not self.real_path.startswith(root):
706 return None
707 path = self.real_path
708 else:
709 path = self.full_path
710 return self._clone(root, path[len(root):], self.tainted)
711
712 def _clone(self, new_root, new_path, tainted):
713 raise NotImplementedError(self.__class__.__name__)
714
715 class File(_TouchedObject):
716 """A file that was accessed. May not be present anymore.
717
718 If tainted is true, it means it is not a real path anymore as a variable
719 replacement occured.
720
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000721 If only_touched is True, this means the file was probed for existence, and
722 it is existent, but was never _opened_. If only_touched is True, the file
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000723 must have existed.
724 """
725 def __init__(self, root, path, tainted, size):
726 super(Results.File, self).__init__(root, path, tainted, size, 1)
727
728 def _clone(self, new_root, new_path, tainted):
729 """Clones itself keeping meta-data."""
730 # Keep the self.size and self._real_path caches for performance reason. It
731 # is also important when the file becomes tainted (with a variable instead
732 # of the real path) since self.path is not an on-disk path anymore so
733 # out._size cannot be updated.
734 out = self.__class__(new_root, new_path, tainted, self.size)
735 out._real_path = self._real_path
736 return out
737
738 class Directory(_TouchedObject):
739 """A directory of files. Must exist."""
740 def __init__(self, root, path, tainted, size, nb_files):
741 """path='.' is a valid value and must be handled appropriately."""
742 assert not path.endswith(os.path.sep), path
743 super(Results.Directory, self).__init__(
744 root, path + os.path.sep, tainted, size, nb_files)
745 # For a Directory instance, self.size is not a cache, it's an actual value
746 # that is never modified and represents the total size of the files
747 # contained in this directory. It is possible that the directory is empty
748 # so that size == 0; this happens if there's only an invalid symlink in
749 # it.
750
751 def flatten(self):
752 out = super(Results.Directory, self).flatten()
753 out['nb_files'] = self.nb_files
754 return out
755
756 def _clone(self, new_root, new_path, tainted):
757 """Clones itself keeping meta-data."""
758 out = self.__class__(
759 new_root,
760 new_path.rstrip(os.path.sep),
761 tainted,
762 self.size,
763 self.nb_files)
764 out._real_path = self._real_path
765 return out
766
767 class Process(object):
768 """A process that was traced.
769
770 Contains references to the files accessed by this process and its children.
771 """
772 def __init__(self, pid, files, executable, command, initial_cwd, children):
773 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
774 self.pid = pid
775 self.files = sorted(files, key=lambda x: x.path)
776 self.children = children
777 self.executable = executable
778 self.command = command
779 self.initial_cwd = initial_cwd
780
781 # Check internal consistency.
782 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
783 f.path for f in self.files)
784 assert isinstance(self.children, list)
785 assert isinstance(self.files, list)
786
787 @property
788 def all(self):
789 for child in self.children:
790 for i in child.all:
791 yield i
792 yield self
793
794 def flatten(self):
795 return {
796 'children': [c.flatten() for c in self.children],
797 'command': self.command,
798 'executable': self.executable,
799 'files': [f.flatten() for f in self.files],
800 'initial_cwd': self.initial_cwd,
801 'pid': self.pid,
802 }
803
804 def strip_root(self, root):
805 assert isabs(root) and root.endswith(os.path.sep), root
806 # Loads the files after since they are constructed as objects.
807 out = self.__class__(
808 self.pid,
809 filter(None, (f.strip_root(root) for f in self.files)),
810 self.executable,
811 self.command,
812 self.initial_cwd,
813 [c.strip_root(root) for c in self.children])
814 logging.debug(
815 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
816 return out
817
818 def __init__(self, process):
819 self.process = process
820 # Cache.
821 self._files = None
822
823 def flatten(self):
824 return {
825 'root': self.process.flatten(),
826 }
827
828 @property
829 def files(self):
830 if self._files is None:
831 self._files = sorted(
832 sum((p.files for p in self.process.all), []),
833 key=lambda x: x.path)
834 return self._files
835
836 @property
837 def existent(self):
838 return [f for f in self.files if f.existent]
839
840 @property
841 def non_existent(self):
842 return [f for f in self.files if not f.existent]
843
844 def strip_root(self, root):
845 """Returns a clone with all the files outside the directory |root| removed
846 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000847
848 It keeps files accessible through the |root| directory or that have been
849 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000850 """
851 # Resolve any symlink
852 root = os.path.realpath(root)
853 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep
854 logging.debug('strip_root(%s)' % root)
855 return Results(self.process.strip_root(root))
856
857
858class ApiBase(object):
859 """OS-agnostic API to trace a process and its children."""
860 class Context(object):
861 """Processes one log line at a time and keeps the list of traced processes.
862
863 The parsing is complicated by the fact that logs are traced out of order for
864 strace but in-order for dtrace and logman. In addition, on Windows it is
865 very frequent that processids are reused so a flat list cannot be used. But
866 at the same time, it is impossible to faithfully construct a graph when the
867 logs are processed out of order. So both a tree and a flat mapping are used,
868 the tree is the real process tree, while the flat mapping stores the last
869 valid process for the corresponding processid. For the strace case, the
870 tree's head is guessed at the last moment.
871 """
872 class Process(object):
873 """Keeps context for one traced child process.
874
875 Logs all the files this process touched. Ignores directories.
876 """
877 def __init__(self, blacklist, pid, initial_cwd):
878 # Check internal consistency.
879 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000880 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000881 self.pid = pid
882 # children are Process instances.
883 self.children = []
884 self.initial_cwd = initial_cwd
885 self.cwd = None
886 self.files = set()
887 self.only_touched = set()
888 self.executable = None
889 self.command = None
890 self._blacklist = blacklist
891
892 def to_results_process(self):
893 """Resolves file case sensitivity and or late-bound strings."""
894 # When resolving files, it's normal to get dupe because a file could be
895 # opened multiple times with different case. Resolve the deduplication
896 # here.
897 def fix_path(x):
898 """Returns the native file path case.
899
900 Converts late-bound strings.
901 """
902 if not x:
903 # Do not convert None instance to 'None'.
904 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000905 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000906 if os.path.isabs(x):
907 # If the path is not absolute, which tends to happen occasionally on
908 # Windows, it is not possible to get the native path case so ignore
909 # that trace. It mostly happens for 'executable' value.
910 x = get_native_path_case(x)
911 return x
912
913 def fix_and_blacklist_path(x):
914 x = fix_path(x)
915 if not x:
916 return
917 # The blacklist needs to be reapplied, since path casing could
918 # influence blacklisting.
919 if self._blacklist(x):
920 return
921 return x
922
923 # Filters out directories. Some may have passed through.
924 files = set(f for f in map(fix_and_blacklist_path, self.files) if f)
925 only_touched = set(
926 f for f in map(fix_and_blacklist_path, self.only_touched) if f)
927 only_touched -= files
928
929 files = [
930 Results.File(None, f, False, None) for f in files
931 if not os.path.isdir(f)
932 ]
933 # Using 0 as size means the file's content is ignored since the file was
934 # never opened for I/O.
935 files.extend(
936 Results.File(None, f, False, 0) for f in only_touched
937 if not os.path.isdir(f)
938 )
939 return Results.Process(
940 self.pid,
941 files,
942 fix_path(self.executable),
943 self.command,
944 fix_path(self.initial_cwd),
945 [c.to_results_process() for c in self.children])
946
947 def add_file(self, filepath, touch_only):
948 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000949 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000950 return
951 logging.debug('add_file(%d, %s, %s)' % (self.pid, filepath, touch_only))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000952 # Note that filepath and not render(filepath) is added. It is
953 # because filepath could be something else than a string, like a
954 # RelativePath instance for dtrace logs.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000955 if touch_only:
956 self.only_touched.add(filepath)
957 else:
958 self.files.add(filepath)
959
960 def __init__(self, blacklist):
961 self.blacklist = blacklist
962 # Initial process.
963 self.root_process = None
964 # dict to accelerate process lookup, to not have to lookup the whole graph
965 # each time.
966 self._process_lookup = {}
967
968 class Tracer(object):
969 """During it's lifetime, the tracing subsystem is enabled."""
970 def __init__(self, logname):
971 self._logname = logname
972 self._lock = threading.Lock()
973 self._traces = []
974 self._initialized = True
975
976 def trace(self, cmd, cwd, tracename, output):
977 """Runs the OS-specific trace program on an executable.
978
979 Arguments:
980 - cmd: The command (a list) to run.
981 - cwd: Current directory to start the child process in.
982 - tracename: Name of the trace in the logname file.
983 - output: If False, redirects output to PIPEs.
984
985 Returns a tuple (resultcode, output) and updates the internal trace
986 entries.
987 """
988 # The implementation adds an item to self._traces.
989 raise NotImplementedError(self.__class__.__name__)
990
991 def close(self, _timeout=None):
992 """Saves the meta-data in the logname file.
993
994 For kernel-based tracing, stops the tracing subsystem.
995
996 Must not be used manually when using 'with' construct.
997 """
998 with self._lock:
999 assert self._initialized
1000 try:
1001 data = {
1002 'traces': self._traces,
1003 }
1004 write_json(self._logname, data, False)
1005 finally:
1006 self._initialized = False
1007
1008 def post_process_log(self):
1009 """Post-processes the log so it becomes faster to load afterward.
1010
1011 Must not be used manually when using 'with' construct.
1012 """
1013 assert not self._initialized, 'Must stop tracing first.'
1014
1015 def __enter__(self):
1016 """Enables 'with' statement."""
1017 return self
1018
1019 def __exit__(self, exc_type, exc_value, traceback):
1020 """Enables 'with' statement."""
1021 self.close()
1022 # If an exception was thrown, do not process logs.
1023 if not exc_type:
1024 self.post_process_log()
1025
1026 def get_tracer(self, logname):
1027 """Returns an ApiBase.Tracer instance.
1028
1029 Initializes the tracing subsystem, which is a requirement for kernel-based
1030 tracers. Only one tracer instance should be live at a time!
1031
1032 logname is the filepath to the json file that will contain the meta-data
1033 about the logs.
1034 """
1035 return self.Tracer(logname)
1036
1037 @staticmethod
1038 def clean_trace(logname):
1039 """Deletes an old log."""
1040 raise NotImplementedError()
1041
1042 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001043 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001044 """Processes trace logs and returns the files opened and the files that do
1045 not exist.
1046
1047 It does not track directories.
1048
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001049 Arguments:
1050 - logname: must be an absolute path.
1051 - blacklist: must be a lambda.
1052 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001053
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001054 Most of the time, files that do not exist are temporary test files that
1055 should be put in /tmp instead. See http://crbug.com/116251.
1056
1057 Returns a list of dict with keys:
1058 - results: A Results instance.
1059 - trace: The corresponding tracename parameter provided to
1060 get_tracer().trace().
1061 - output: Output gathered during execution, if get_tracer().trace(...,
1062 output=False) was used.
1063 """
1064 raise NotImplementedError(cls.__class__.__name__)
1065
1066
1067class Strace(ApiBase):
1068 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001069 @staticmethod
1070 def load_filename(filename):
1071 """Parses a filename in a log."""
1072 assert isinstance(filename, str)
1073 out = ''
1074 i = 0
1075 while i < len(filename):
1076 c = filename[i]
1077 if c == '\\':
1078 out += chr(int(filename[i+1:i+4], 8))
1079 i += 4
1080 else:
1081 out += c
1082 i += 1
1083 # TODO(maruel): That's not necessarily true that the current code page is
1084 # utf-8.
1085 return out.decode('utf-8')
1086
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001087 class Context(ApiBase.Context):
1088 """Processes a strace log line and keeps the list of existent and non
1089 existent files accessed.
1090
1091 Ignores directories.
1092
1093 Uses late-binding to processes the cwd of each process. The problem is that
1094 strace generates one log file per process it traced but doesn't give any
1095 information about which process was started when and by who. So we don't
1096 even know which process is the initial one. So process the logs out of
1097 order and use late binding with RelativePath to be able to deduce the
1098 initial directory of each process once all the logs are parsed.
1099 """
1100 class Process(ApiBase.Context.Process):
1101 """Represents the state of a process.
1102
1103 Contains all the information retrieved from the pid-specific log.
1104 """
1105 # Function names are using ([a-z_0-9]+)
1106 # This is the most common format. function(args) = result
1107 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.+?)\)\s+= (.+)$')
1108 # An interrupted function call, only grab the minimal header.
1109 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001110 # An interrupted function call, with the process exiting. It must be the
1111 # last line in the log.
1112 RE_UNFINISHED_EXIT = re.compile(
1113 r'^([^\(]+)(.*) \<unfinished \.\.\.\ exit status \d+>$')
maruel@chromium.org4e506502012-10-18 15:02:58 +00001114 # An interrupted function call the hard way. Usually observed with futex()
1115 # on ubuntu 12.04.
csharp@chromium.org8cefa4c2013-01-16 19:33:47 +00001116 RE_INTERRUPTED_HARD = re.compile(r'^([^\(]+)\('
1117 '[A-Z0-9a-z:\,\_\|\{\}\(\)\>\< ]*$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001118 # A resumed function call.
1119 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
1120 # A process received a signal.
1121 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
1122 # A process didn't handle a signal. Ignore any junk appearing before,
1123 # because the process was forcibly killed so it won't open any new file.
1124 RE_KILLED = re.compile(
1125 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
1126 # The process has exited.
1127 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
1128 # A call was canceled. Ignore any prefix.
1129 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
1130 # Happens when strace fails to even get the function name.
1131 UNNAMED_FUNCTION = '????'
1132
1133 # Corner-case in python, a class member function decorator must not be
1134 # @staticmethod.
1135 def parse_args(regexp, expect_zero): # pylint: disable=E0213
1136 """Automatically convert the str 'args' into a list of processed
1137 arguments.
1138
1139 Arguments:
1140 - regexp is used to parse args.
1141 - expect_zero: one of True, False or None.
1142 - True: will check for result.startswith('0') first and will ignore
1143 the trace line completely otherwise. This is important because for
1144 many functions, the regexp will not process if the call failed.
1145 - False: will check for not result.startswith(('?', '-1')) for the
1146 same reason than with True.
1147 - None: ignore result.
1148 """
1149 def meta_hook(function):
1150 assert function.__name__.startswith('handle_')
1151 def hook(self, args, result):
1152 if expect_zero is True and not result.startswith('0'):
1153 return
1154 if expect_zero is False and result.startswith(('?', '-1')):
1155 return
1156 match = re.match(regexp, args)
1157 if not match:
1158 raise TracingFailure(
1159 'Failed to parse %s(%s) = %s' %
1160 (function.__name__[len('handle_'):], args, result),
1161 None, None, None)
1162 return function(self, match.groups(), result)
1163 return hook
1164 return meta_hook
1165
1166 class RelativePath(object):
1167 """A late-bound relative path."""
1168 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001169 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001170 self.parent = parent
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001171 assert isinstance(value, (None.__class__, str)), repr(value)
1172 self.value = Strace.load_filename(value) if value else value
1173 if self.value:
1174 assert '\\' not in self.value, value
1175 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001176
1177 def render(self):
1178 """Returns the current directory this instance is representing.
1179
1180 This function is used to return the late-bound value.
1181 """
1182 if self.value and self.value.startswith(u'/'):
1183 # An absolute path.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001184 # TODO(maruel): This is wrong, we can't assert it is utf-8.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001185 return self.value
1186 parent = self.parent.render() if self.parent else u'<None>'
1187 if self.value:
1188 return os.path.normpath(os.path.join(parent, self.value))
1189 return parent
1190
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001191 def __init__(self, root, pid):
1192 """Keeps enough information to be able to guess the original process
1193 root.
1194
1195 strace doesn't store which process was the initial process. So more
1196 information needs to be kept so the graph can be reconstructed from the
1197 flat map.
1198 """
1199 logging.info('%s(%d)' % (self.__class__.__name__, pid))
1200 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
1201 assert isinstance(root, ApiBase.Context)
1202 self._root = weakref.ref(root)
1203 # The dict key is the function name of the pending call, like 'open'
1204 # or 'execve'.
1205 self._pending_calls = {}
1206 self._line_number = 0
1207 # Current directory when the process started.
1208 self.initial_cwd = self.RelativePath(self._root(), None)
1209 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001210 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001211
1212 def get_cwd(self):
1213 """Returns the best known value of cwd."""
1214 return self.cwd or self.initial_cwd
1215
1216 def render(self):
1217 """Returns the string value of the RelativePath() object.
1218
1219 Used by RelativePath. Returns the initial directory and not the
1220 current one since the current directory 'cwd' validity is time-limited.
1221
1222 The validity is only guaranteed once all the logs are processed.
1223 """
1224 return self.initial_cwd.render()
1225
1226 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001227 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001228 self._line_number += 1
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001229 if self._done:
1230 raise TracingFailure(
1231 'Found a trace for a terminated process',
1232 None, None, None)
1233
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001234 if self.RE_SIGNAL.match(line):
1235 # Ignore signals.
1236 return
1237
1238 try:
1239 match = self.RE_KILLED.match(line)
1240 if match:
1241 # Converts a '+++ killed by Foo +++' trace into an exit_group().
1242 self.handle_exit_group(match.group(1), None)
1243 return
1244
1245 match = self.RE_PROCESS_EXITED.match(line)
1246 if match:
1247 # Converts a '+++ exited with 1 +++' trace into an exit_group()
1248 self.handle_exit_group(match.group(1), None)
1249 return
1250
1251 match = self.RE_UNFINISHED.match(line)
1252 if match:
1253 if match.group(1) in self._pending_calls:
1254 raise TracingFailure(
1255 'Found two unfinished calls for the same function',
1256 None, None, None,
1257 self._pending_calls)
1258 self._pending_calls[match.group(1)] = (
1259 match.group(1) + match.group(2))
1260 return
1261
maruel@chromium.org4e506502012-10-18 15:02:58 +00001262 match = (
1263 self.RE_UNFINISHED_EXIT.match(line) or
1264 self.RE_INTERRUPTED_HARD.match(line))
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001265 if match:
1266 # The process died. No other line can be processed afterward.
1267 self._done = True
1268 return
1269
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001270 match = self.RE_UNAVAILABLE.match(line)
1271 if match:
1272 # This usually means a process was killed and a pending call was
1273 # canceled.
1274 # TODO(maruel): Look up the last exit_group() trace just above and
1275 # make sure any self._pending_calls[anything] is properly flushed.
1276 return
1277
1278 match = self.RE_RESUMED.match(line)
1279 if match:
1280 if match.group(1) not in self._pending_calls:
1281 raise TracingFailure(
1282 'Found a resumed call that was not logged as unfinished',
1283 None, None, None,
1284 self._pending_calls)
1285 pending = self._pending_calls.pop(match.group(1))
1286 # Reconstruct the line.
1287 line = pending + match.group(2)
1288
1289 match = self.RE_HEADER.match(line)
1290 if not match:
1291 raise TracingFailure(
1292 'Found an invalid line: %s' % line,
1293 None, None, None)
1294 if match.group(1) == self.UNNAMED_FUNCTION:
1295 return
1296
1297 # It's a valid line, handle it.
1298 handler = getattr(self, 'handle_%s' % match.group(1), None)
1299 if not handler:
1300 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1301 return handler(match.group(2), match.group(3))
1302 except TracingFailure, e:
1303 # Hack in the values since the handler could be a static function.
1304 e.pid = self.pid
1305 e.line = line
1306 e.line_number = self._line_number
1307 # Re-raise the modified exception.
1308 raise
1309 except (KeyError, NotImplementedError, ValueError), e:
1310 raise TracingFailure(
1311 'Trace generated a %s exception: %s' % (
1312 e.__class__.__name__, str(e)),
1313 self.pid,
1314 self._line_number,
1315 line,
1316 e)
1317
1318 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1319 def handle_access(self, args, _result):
1320 self._handle_file(args[0], True)
1321
1322 @parse_args(r'^\"(.+?)\"$', True)
1323 def handle_chdir(self, args, _result):
1324 """Updates cwd."""
1325 self.cwd = self.RelativePath(self, args[0])
1326 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1327
1328 def handle_clone(self, _args, result):
1329 """Transfers cwd."""
1330 if result.startswith(('?', '-1')):
1331 # The call failed.
1332 return
1333 # Update the other process right away.
1334 childpid = int(result)
1335 child = self._root().get_or_set_proc(childpid)
1336 if child.parentid is not None or childpid in self.children:
1337 raise TracingFailure(
1338 'Found internal inconsitency in process lifetime detection '
1339 'during a clone() call',
1340 None, None, None)
1341
1342 # Copy the cwd object.
1343 child.initial_cwd = self.get_cwd()
1344 child.parentid = self.pid
1345 # It is necessary because the logs are processed out of order.
1346 self.children.append(child)
1347
1348 def handle_close(self, _args, _result):
1349 pass
1350
1351 def handle_chmod(self, _args, _result):
1352 pass
1353
1354 def handle_creat(self, _args, _result):
1355 # Ignore files created, since they didn't need to exist.
1356 pass
1357
1358 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1359 def handle_execve(self, args, _result):
1360 # Even if in practice execve() doesn't returns when it succeeds, strace
1361 # still prints '0' as the result.
1362 filepath = args[0]
1363 self._handle_file(filepath, False)
1364 self.executable = self.RelativePath(self.get_cwd(), filepath)
1365 self.command = strace_process_quoted_arguments(args[1])
1366
1367 def handle_exit_group(self, _args, _result):
1368 """Removes cwd."""
1369 self.cwd = None
1370
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001371 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1372 def handle_faccessat(self, args, _results):
1373 if args[0] == 'AT_FDCWD':
1374 self._handle_file(args[1], True)
1375 else:
1376 raise Exception('Relative faccess not implemented.')
1377
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001378 def handle_fork(self, args, result):
1379 self._handle_unknown('fork', args, result)
1380
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001381 def handle_futex(self, _args, _result):
1382 pass
1383
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001384 def handle_getcwd(self, _args, _result):
1385 pass
1386
1387 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1388 def handle_link(self, args, _result):
1389 self._handle_file(args[0], False)
1390 self._handle_file(args[1], False)
1391
1392 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1393 def handle_lstat(self, args, _result):
1394 self._handle_file(args[0], True)
1395
1396 def handle_mkdir(self, _args, _result):
1397 pass
1398
1399 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1400 def handle_open(self, args, _result):
1401 if 'O_DIRECTORY' in args[1]:
1402 return
1403 self._handle_file(args[0], False)
1404
1405 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1406 def handle_openat(self, args, _result):
1407 if 'O_DIRECTORY' in args[2]:
1408 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001409 if args[0] == 'AT_FDCWD':
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001410 self._handle_file(args[1], False)
1411 else:
1412 # TODO(maruel): Implement relative open if necessary instead of the
1413 # AT_FDCWD flag, let's hope not since this means tracking all active
1414 # directory handles.
1415 raise Exception('Relative open via openat not implemented.')
1416
1417 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1418 def handle_readlink(self, args, _result):
1419 self._handle_file(args[0], False)
1420
1421 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1422 def handle_rename(self, args, _result):
1423 self._handle_file(args[0], False)
1424 self._handle_file(args[1], False)
1425
1426 def handle_rmdir(self, _args, _result):
1427 pass
1428
1429 def handle_setxattr(self, _args, _result):
1430 pass
1431
1432 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1433 def handle_stat(self, args, _result):
1434 self._handle_file(args[0], True)
1435
1436 def handle_symlink(self, _args, _result):
1437 pass
1438
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001439 @parse_args(r'^\"(.+?)\", \d+', True)
1440 def handle_truncate(self, args, _result):
1441 self._handle_file(args[0], False)
1442
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001443 def handle_unlink(self, _args, _result):
1444 # In theory, the file had to be created anyway.
1445 pass
1446
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001447 def handle_unlinkat(self, _args, _result):
1448 # In theory, the file had to be created anyway.
1449 pass
1450
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001451 def handle_statfs(self, _args, _result):
1452 pass
1453
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001454 def handle_utimensat(self, _args, _result):
1455 pass
1456
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001457 def handle_vfork(self, args, result):
1458 self._handle_unknown('vfork', args, result)
1459
1460 @staticmethod
1461 def _handle_unknown(function, args, result):
1462 raise TracingFailure(
1463 'Unexpected/unimplemented trace %s(%s)= %s' %
1464 (function, args, result),
1465 None, None, None)
1466
1467 def _handle_file(self, filepath, touch_only):
1468 filepath = self.RelativePath(self.get_cwd(), filepath)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001469 #assert not touch_only, render(filepath)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001470 self.add_file(filepath, touch_only)
1471
1472 def __init__(self, blacklist, initial_cwd):
1473 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001474 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001475 self.initial_cwd = initial_cwd
1476
1477 def render(self):
1478 """Returns the string value of the initial cwd of the root process.
1479
1480 Used by RelativePath.
1481 """
1482 return self.initial_cwd
1483
1484 def on_line(self, pid, line):
1485 """Transfers control into the Process.on_line() function."""
1486 self.get_or_set_proc(pid).on_line(line.strip())
1487
1488 def to_results(self):
1489 """Finds back the root process and verify consistency."""
1490 # TODO(maruel): Absolutely unecessary, fix me.
1491 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1492 if len(root) != 1:
1493 raise TracingFailure(
1494 'Found internal inconsitency in process lifetime detection '
1495 'while finding the root process',
1496 None,
1497 None,
1498 None,
1499 sorted(p.pid for p in root))
1500 self.root_process = root[0]
1501 process = self.root_process.to_results_process()
1502 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1503 raise TracingFailure(
1504 'Found internal inconsitency in process lifetime detection '
1505 'while looking for len(tree) == len(list)',
1506 None,
1507 None,
1508 None,
1509 sorted(self._process_lookup),
1510 sorted(p.pid for p in process.all))
1511 return Results(process)
1512
1513 def get_or_set_proc(self, pid):
1514 """Returns the Context.Process instance for this pid or creates a new one.
1515 """
1516 if not pid or not isinstance(pid, int):
1517 raise TracingFailure(
1518 'Unpexpected value for pid: %r' % pid,
1519 pid,
1520 None,
1521 None,
1522 pid)
1523 if pid not in self._process_lookup:
1524 self._process_lookup[pid] = self.Process(self, pid)
1525 return self._process_lookup[pid]
1526
1527 @classmethod
1528 def traces(cls):
1529 """Returns the list of all handled traces to pass this as an argument to
1530 strace.
1531 """
1532 prefix = 'handle_'
1533 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1534
1535 class Tracer(ApiBase.Tracer):
1536 MAX_LEN = 256
1537
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001538 def __init__(self, logname, use_sudo):
1539 super(Strace.Tracer, self).__init__(logname)
1540 self.use_sudo = use_sudo
1541
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001542 def trace(self, cmd, cwd, tracename, output):
1543 """Runs strace on an executable."""
1544 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1545 assert os.path.isabs(cmd[0]), cmd[0]
1546 assert os.path.isabs(cwd), cwd
1547 assert os.path.normpath(cwd) == cwd, cwd
1548 with self._lock:
1549 if not self._initialized:
1550 raise TracingFailure(
1551 'Called Tracer.trace() on an unitialized object',
1552 None, None, None, tracename)
1553 assert tracename not in (i['trace'] for i in self._traces)
1554 stdout = stderr = None
1555 if output:
1556 stdout = subprocess.PIPE
1557 stderr = subprocess.STDOUT
1558 # Ensure all file related APIs are hooked.
1559 traces = ','.join(Strace.Context.traces() + ['file'])
1560 trace_cmd = [
1561 'strace',
1562 '-ff',
1563 '-s', '%d' % self.MAX_LEN,
1564 '-e', 'trace=%s' % traces,
1565 '-o', self._logname + '.' + tracename,
1566 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001567 if self.use_sudo is True:
1568 trace_cmd.insert(0, 'sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001569 child = subprocess.Popen(
1570 trace_cmd + cmd,
1571 cwd=cwd,
1572 stdin=subprocess.PIPE,
1573 stdout=stdout,
1574 stderr=stderr)
1575 out = child.communicate()[0]
1576 # TODO(maruel): Walk the logs and figure out the root process would
1577 # simplify parsing the logs a *lot*.
1578 with self._lock:
1579 assert tracename not in (i['trace'] for i in self._traces)
1580 self._traces.append(
1581 {
1582 'cmd': cmd,
1583 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001584 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001585 # The pid of strace process, not very useful.
1586 'pid': child.pid,
1587 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001588 })
1589 return child.returncode, out
1590
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001591 def __init__(self, use_sudo=None):
1592 super(Strace, self).__init__()
1593 self.use_sudo = use_sudo
1594
1595 def get_tracer(self, logname):
1596 return self.Tracer(logname, self.use_sudo)
1597
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001598 @staticmethod
1599 def clean_trace(logname):
1600 if os.path.isfile(logname):
1601 os.remove(logname)
1602 # Also delete any pid specific file from previous traces.
1603 for i in glob.iglob(logname + '.*'):
1604 if i.rsplit('.', 1)[1].isdigit():
1605 os.remove(i)
1606
1607 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001608 def parse_log(cls, logname, blacklist, trace_name):
1609 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001610 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001611 data = read_json(logname)
1612 out = []
1613 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001614 if trace_name and item['trace'] != trace_name:
1615 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001616 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001617 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001618 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001619 }
1620 try:
1621 context = cls.Context(blacklist, item['cwd'])
1622 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
1623 pid = pidfile.rsplit('.', 1)[1]
1624 if pid.isdigit():
1625 pid = int(pid)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001626 for line in open(pidfile, 'rb'):
1627 context.on_line(pid, line)
1628 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001629 except TracingFailure:
1630 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001631 out.append(result)
1632 return out
1633
1634
1635class Dtrace(ApiBase):
1636 """Uses DTrace framework through dtrace. Requires root access.
1637
1638 Implies Mac OSX.
1639
1640 dtruss can't be used because it has compatibility issues with python.
1641
1642 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1643 get the absolute path of the 'cwd' dtrace variable from the probe.
1644
1645 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1646 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1647 manually.
1648
1649 errno is not printed in the log since this implementation currently only cares
1650 about files that were successfully opened.
1651 """
1652 class Context(ApiBase.Context):
1653 # Format: index pid function(args)
1654 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1655
1656 # Arguments parsing.
1657 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1658 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1659 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1660 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1661 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1662 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1663
1664 O_DIRECTORY = 0x100000
1665
1666 class Process(ApiBase.Context.Process):
1667 def __init__(self, *args):
1668 super(Dtrace.Context.Process, self).__init__(*args)
1669 self.cwd = self.initial_cwd
1670
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001671 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001672 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001673 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001674 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001675 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001676 # Process ID of the temporary script created by create_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001677 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001678 self._initial_cwd = initial_cwd
1679 self._line_number = 0
1680
1681 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001682 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001683 self._line_number += 1
1684 match = self.RE_HEADER.match(line)
1685 if not match:
1686 raise TracingFailure(
1687 'Found malformed line: %s' % line,
1688 None,
1689 self._line_number,
1690 line)
1691 fn = getattr(
1692 self,
1693 'handle_%s' % match.group(2).replace('-', '_'),
1694 self._handle_ignored)
1695 # It is guaranteed to succeed because of the regexp. Or at least I thought
1696 # it would.
1697 pid = int(match.group(1))
1698 try:
1699 return fn(pid, match.group(3))
1700 except TracingFailure, e:
1701 # Hack in the values since the handler could be a static function.
1702 e.pid = pid
1703 e.line = line
1704 e.line_number = self._line_number
1705 # Re-raise the modified exception.
1706 raise
1707 except (KeyError, NotImplementedError, ValueError), e:
1708 raise TracingFailure(
1709 'Trace generated a %s exception: %s' % (
1710 e.__class__.__name__, str(e)),
1711 pid,
1712 self._line_number,
1713 line,
1714 e)
1715
1716 def to_results(self):
1717 process = self.root_process.to_results_process()
1718 # Internal concistency check.
1719 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1720 raise TracingFailure(
1721 'Found internal inconsitency in process lifetime detection '
1722 'while looking for len(tree) == len(list)',
1723 None,
1724 None,
1725 None,
1726 sorted(self._process_lookup),
1727 sorted(p.pid for p in process.all))
1728 return Results(process)
1729
1730 def handle_dtrace_BEGIN(self, _pid, args):
1731 if not self.RE_DTRACE_BEGIN.match(args):
1732 raise TracingFailure(
1733 'Found internal inconsitency in dtrace_BEGIN log line',
1734 None, None, None)
1735
1736 def handle_proc_start(self, pid, args):
1737 """Transfers cwd.
1738
1739 The dtrace script already takes care of only tracing the processes that
1740 are child of the traced processes so there is no need to verify the
1741 process hierarchy.
1742 """
1743 if pid in self._process_lookup:
1744 raise TracingFailure(
1745 'Found internal inconsitency in proc_start: %d started two times' %
1746 pid,
1747 None, None, None)
1748 match = self.RE_PROC_START.match(args)
1749 if not match:
1750 raise TracingFailure(
1751 'Failed to parse arguments: %s' % args,
1752 None, None, None)
1753 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001754 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001755 proc = self.root_process = self.Process(
1756 self.blacklist, pid, self._initial_cwd)
1757 elif ppid in self._process_lookup:
1758 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1759 self._process_lookup[ppid].children.append(proc)
1760 else:
1761 # Another process tree, ignore.
1762 return
1763 self._process_lookup[pid] = proc
1764 logging.debug(
1765 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001766 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001767
1768 def handle_proc_exit(self, pid, _args):
1769 """Removes cwd."""
1770 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001771 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001772 self._process_lookup[pid].cwd = None
1773
1774 def handle_execve(self, pid, args):
1775 """Sets the process' executable.
1776
1777 TODO(maruel): Read command line arguments. See
1778 https://discussions.apple.com/thread/1980539 for an example.
1779 https://gist.github.com/1242279
1780
1781 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1782 :)
1783 """
1784 if not pid in self._process_lookup:
1785 # Another process tree, ignore.
1786 return
1787 match = self.RE_EXECVE.match(args)
1788 if not match:
1789 raise TracingFailure(
1790 'Failed to parse arguments: %r' % args,
1791 None, None, None)
1792 proc = self._process_lookup[pid]
1793 proc.executable = match.group(1)
1794 proc.command = self.process_escaped_arguments(match.group(3))
1795 if int(match.group(2)) != len(proc.command):
1796 raise TracingFailure(
1797 'Failed to parse execve() arguments: %s' % args,
1798 None, None, None)
1799
1800 def handle_chdir(self, pid, args):
1801 """Updates cwd."""
1802 if pid not in self._process_lookup:
1803 # Another process tree, ignore.
1804 return
1805 cwd = self.RE_CHDIR.match(args).group(1)
1806 if not cwd.startswith('/'):
1807 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1808 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1809 else:
1810 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1811 cwd2 = cwd
1812 self._process_lookup[pid].cwd = cwd2
1813
1814 def handle_open_nocancel(self, pid, args):
1815 """Redirects to handle_open()."""
1816 return self.handle_open(pid, args)
1817
1818 def handle_open(self, pid, args):
1819 if pid not in self._process_lookup:
1820 # Another process tree, ignore.
1821 return
1822 match = self.RE_OPEN.match(args)
1823 if not match:
1824 raise TracingFailure(
1825 'Failed to parse arguments: %s' % args,
1826 None, None, None)
1827 flag = int(match.group(2), 16)
1828 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1829 # Ignore directories.
1830 return
1831 self._handle_file(pid, match.group(1))
1832
1833 def handle_rename(self, pid, args):
1834 if pid not in self._process_lookup:
1835 # Another process tree, ignore.
1836 return
1837 match = self.RE_RENAME.match(args)
1838 if not match:
1839 raise TracingFailure(
1840 'Failed to parse arguments: %s' % args,
1841 None, None, None)
1842 self._handle_file(pid, match.group(1))
1843 self._handle_file(pid, match.group(2))
1844
1845 def _handle_file(self, pid, filepath):
1846 if not filepath.startswith('/'):
1847 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1848 # We can get '..' in the path.
1849 filepath = os.path.normpath(filepath)
1850 # Sadly, still need to filter out directories here;
1851 # saw open_nocancel(".", 0, 0) = 0 lines.
1852 if os.path.isdir(filepath):
1853 return
1854 self._process_lookup[pid].add_file(filepath, False)
1855
1856 def handle_ftruncate(self, pid, args):
1857 """Just used as a signal to kill dtrace, ignoring."""
1858 pass
1859
1860 @staticmethod
1861 def _handle_ignored(pid, args):
1862 """Is called for all the event traces that are not handled."""
1863 raise NotImplementedError('Please implement me')
1864
1865 @staticmethod
1866 def process_escaped_arguments(text):
1867 """Extracts escaped arguments on a string and return the arguments as a
1868 list.
1869
1870 Implemented as an automaton.
1871
1872 Example:
1873 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1874 function will return ['python2.7', '-c', 'print("hi")]
1875 """
1876 if not text.endswith('\\0'):
1877 raise ValueError('String is not null terminated: %r' % text, text)
1878 text = text[:-2]
1879
1880 def unescape(x):
1881 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1882 out = []
1883 escaped = False
1884 for i in x:
1885 if i == '\\' and not escaped:
1886 escaped = True
1887 continue
1888 escaped = False
1889 out.append(i)
1890 return ''.join(out)
1891
1892 return [unescape(i) for i in text.split('\\001')]
1893
1894 class Tracer(ApiBase.Tracer):
1895 # pylint: disable=C0301
1896 #
1897 # To understand the following code, you'll want to take a look at:
1898 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
1899 # https://wikis.oracle.com/display/DTrace/Variables
1900 # http://docs.oracle.com/cd/E19205-01/820-4221/
1901 #
1902 # 0. Dump all the valid probes into a text file. It is important, you
1903 # want to redirect into a file and you don't want to constantly 'sudo'.
1904 # $ sudo dtrace -l > probes.txt
1905 #
1906 # 1. Count the number of probes:
1907 # $ wc -l probes.txt
1908 # 81823 # On OSX 10.7, including 1 header line.
1909 #
1910 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1911 # likes and skipping the header with NR>1:
1912 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1913 # dtrace
1914 # fbt
1915 # io
1916 # ip
1917 # lockstat
1918 # mach_trap
1919 # proc
1920 # profile
1921 # sched
1922 # syscall
1923 # tcp
1924 # vminfo
1925 #
1926 # 3. List of valid probes:
1927 # $ grep syscall probes.txt | less
1928 # or use dtrace directly:
1929 # $ sudo dtrace -l -P syscall | less
1930 #
1931 # trackedpid is an associative array where its value can be 0, 1 or 2.
1932 # 0 is for untracked processes and is the default value for items not
1933 # in the associative array.
1934 # 1 is for tracked processes.
1935 # 2 is for the script created by create_thunk() only. It is not tracked
1936 # itself but all its decendants are.
1937 #
1938 # The script will kill itself only once waiting_to_die == 1 and
1939 # current_processes == 0, so that both getlogin() was called and that
1940 # all traced processes exited.
1941 #
1942 # TODO(maruel): Use cacheable predicates. See
1943 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1944 D_CODE = """
1945 dtrace:::BEGIN {
1946 waiting_to_die = 0;
1947 current_processes = 0;
1948 logindex = 0;
1949 printf("%d %d %s_%s(\\"%s\\")\\n",
1950 logindex, PID, probeprov, probename, SCRIPT);
1951 logindex++;
1952 }
1953
1954 proc:::start /trackedpid[ppid]/ {
1955 trackedpid[pid] = 1;
1956 current_processes += 1;
1957 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1958 logindex, pid, probeprov, probename,
1959 ppid,
1960 execname,
1961 current_processes);
1962 logindex++;
1963 }
1964 /* Should use SCRIPT but there is no access to this variable at that
1965 * point. */
1966 proc:::start /ppid == PID && execname == "Python"/ {
1967 trackedpid[pid] = 2;
1968 current_processes += 1;
1969 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1970 logindex, pid, probeprov, probename,
1971 ppid,
1972 execname,
1973 current_processes);
1974 logindex++;
1975 }
1976 proc:::exit /trackedpid[pid] &&
1977 current_processes == 1 &&
1978 waiting_to_die == 1/ {
1979 trackedpid[pid] = 0;
1980 current_processes -= 1;
1981 printf("%d %d %s_%s(%d)\\n",
1982 logindex, pid, probeprov, probename,
1983 current_processes);
1984 logindex++;
1985 exit(0);
1986 }
1987 proc:::exit /trackedpid[pid]/ {
1988 trackedpid[pid] = 0;
1989 current_processes -= 1;
1990 printf("%d %d %s_%s(%d)\\n",
1991 logindex, pid, probeprov, probename,
1992 current_processes);
1993 logindex++;
1994 }
1995
1996 /* Use an arcane function to detect when we need to die */
1997 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
1998 waiting_to_die = 1;
1999 printf("%d %d %s()\\n", logindex, pid, probefunc);
2000 logindex++;
2001 }
2002 syscall::ftruncate:entry /
2003 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
2004 exit(0);
2005 }
2006
2007 syscall::open*:entry /trackedpid[pid] == 1/ {
2008 self->open_arg0 = arg0;
2009 self->open_arg1 = arg1;
2010 self->open_arg2 = arg2;
2011 }
2012 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
2013 this->open_arg0 = copyinstr(self->open_arg0);
2014 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
2015 logindex, pid, probefunc,
2016 this->open_arg0,
2017 self->open_arg1,
2018 self->open_arg2);
2019 logindex++;
2020 this->open_arg0 = 0;
2021 }
2022 syscall::open*:return /trackedpid[pid] == 1/ {
2023 self->open_arg0 = 0;
2024 self->open_arg1 = 0;
2025 self->open_arg2 = 0;
2026 }
2027
2028 syscall::rename:entry /trackedpid[pid] == 1/ {
2029 self->rename_arg0 = arg0;
2030 self->rename_arg1 = arg1;
2031 }
2032 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
2033 this->rename_arg0 = copyinstr(self->rename_arg0);
2034 this->rename_arg1 = copyinstr(self->rename_arg1);
2035 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
2036 logindex, pid, probefunc,
2037 this->rename_arg0,
2038 this->rename_arg1);
2039 logindex++;
2040 this->rename_arg0 = 0;
2041 this->rename_arg1 = 0;
2042 }
2043 syscall::rename:return /trackedpid[pid] == 1/ {
2044 self->rename_arg0 = 0;
2045 self->rename_arg1 = 0;
2046 }
2047
2048 /* Track chdir, it's painful because it is only receiving relative path.
2049 */
2050 syscall::chdir:entry /trackedpid[pid] == 1/ {
2051 self->chdir_arg0 = arg0;
2052 }
2053 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
2054 this->chdir_arg0 = copyinstr(self->chdir_arg0);
2055 printf("%d %d %s(\\"%s\\")\\n",
2056 logindex, pid, probefunc,
2057 this->chdir_arg0);
2058 logindex++;
2059 this->chdir_arg0 = 0;
2060 }
2061 syscall::chdir:return /trackedpid[pid] == 1/ {
2062 self->chdir_arg0 = 0;
2063 }
2064 """
2065
2066 # execve-specific code, tends to throw a lot of exceptions.
2067 D_CODE_EXECVE = """
2068 /* Finally what we care about! */
2069 syscall::exec*:entry /trackedpid[pid]/ {
2070 self->exec_arg0 = copyinstr(arg0);
2071 /* Incrementally probe for a NULL in the argv parameter of execve() to
2072 * figure out argc. */
2073 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
2074 * found. */
2075 self->exec_argc = 0;
2076 /* Probe for argc==1 */
2077 this->exec_argv = (user_addr_t*)copyin(
2078 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2079 self->exec_argc = this->exec_argv[self->exec_argc] ?
2080 (self->exec_argc + 1) : self->exec_argc;
2081
2082 /* Probe for argc==2 */
2083 this->exec_argv = (user_addr_t*)copyin(
2084 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2085 self->exec_argc = this->exec_argv[self->exec_argc] ?
2086 (self->exec_argc + 1) : self->exec_argc;
2087
2088 /* Probe for argc==3 */
2089 this->exec_argv = (user_addr_t*)copyin(
2090 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2091 self->exec_argc = this->exec_argv[self->exec_argc] ?
2092 (self->exec_argc + 1) : self->exec_argc;
2093
2094 /* Probe for argc==4 */
2095 this->exec_argv = (user_addr_t*)copyin(
2096 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2097 self->exec_argc = this->exec_argv[self->exec_argc] ?
2098 (self->exec_argc + 1) : self->exec_argc;
2099
2100 /* Copy the inputs strings since there is no guarantee they'll be
2101 * present after the call completed. */
2102 self->exec_argv0 = (self->exec_argc > 0) ?
2103 copyinstr(this->exec_argv[0]) : "";
2104 self->exec_argv1 = (self->exec_argc > 1) ?
2105 copyinstr(this->exec_argv[1]) : "";
2106 self->exec_argv2 = (self->exec_argc > 2) ?
2107 copyinstr(this->exec_argv[2]) : "";
2108 self->exec_argv3 = (self->exec_argc > 3) ?
2109 copyinstr(this->exec_argv[3]) : "";
2110 this->exec_argv = 0;
2111 }
2112 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2113 /* We need to join strings here, as using multiple printf() would
2114 * cause tearing when multiple threads/processes are traced.
2115 * Since it is impossible to escape a string and join it to another one,
2116 * like sprintf("%s%S", previous, more), use hackery.
2117 * Each of the elements are split with a \\1. \\0 cannot be used because
2118 * it is simply ignored. This will conflict with any program putting a
2119 * \\1 in their execve() string but this should be "rare enough" */
2120 this->args = "";
2121 /* Process exec_argv[0] */
2122 this->args = strjoin(
2123 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2124
2125 /* Process exec_argv[1] */
2126 this->args = strjoin(
2127 this->args, (self->exec_argc > 1) ? "\\1" : "");
2128 this->args = strjoin(
2129 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2130
2131 /* Process exec_argv[2] */
2132 this->args = strjoin(
2133 this->args, (self->exec_argc > 2) ? "\\1" : "");
2134 this->args = strjoin(
2135 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2136
2137 /* Process exec_argv[3] */
2138 this->args = strjoin(
2139 this->args, (self->exec_argc > 3) ? "\\1" : "");
2140 this->args = strjoin(
2141 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2142
2143 /* Prints self->exec_argc to permits verifying the internal
2144 * consistency since this code is quite fishy. */
2145 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2146 logindex, pid, probefunc,
2147 self->exec_arg0,
2148 self->exec_argc,
2149 this->args);
2150 logindex++;
2151 this->args = 0;
2152 }
2153 syscall::exec*:return /trackedpid[pid]/ {
2154 self->exec_arg0 = 0;
2155 self->exec_argc = 0;
2156 self->exec_argv0 = 0;
2157 self->exec_argv1 = 0;
2158 self->exec_argv2 = 0;
2159 self->exec_argv3 = 0;
2160 }
2161 """
2162
2163 # Code currently not used.
2164 D_EXTRANEOUS = """
2165 /* This is a good learning experience, since it traces a lot of things
2166 * related to the process and child processes.
2167 * Warning: it generates a gigantic log. For example, tracing
2168 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2169 * several minutes to execute.
2170 */
2171 /*
2172 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2173 printf("%d %d %s_%s() = %d\\n",
2174 logindex, pid, probeprov, probefunc, errno);
2175 logindex++;
2176 }
2177 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2178 printf("%d %d %s_%s() = %d\\n",
2179 logindex, pid, probeprov, probefunc, errno);
2180 logindex++;
2181 }
2182 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2183 printf("%d %d %s_%s() = %d\\n",
2184 logindex, pid, probeprov, probefunc, errno);
2185 logindex++;
2186 }
2187 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2188 printf("%d %d %s_%s() = %d\\n",
2189 logindex, pid, probeprov, probefunc, errno);
2190 logindex++;
2191 }
2192 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2193 printf("%d %d %s_%s() = %d\\n",
2194 logindex, pid, probeprov, probefunc, errno);
2195 logindex++;
2196 }
2197 */
2198 /* TODO(maruel): *stat* functions and friends
2199 syscall::access:return,
2200 syscall::chdir:return,
2201 syscall::chflags:return,
2202 syscall::chown:return,
2203 syscall::chroot:return,
2204 syscall::getattrlist:return,
2205 syscall::getxattr:return,
2206 syscall::lchown:return,
2207 syscall::lstat64:return,
2208 syscall::lstat:return,
2209 syscall::mkdir:return,
2210 syscall::pathconf:return,
2211 syscall::readlink:return,
2212 syscall::removexattr:return,
2213 syscall::setxattr:return,
2214 syscall::stat64:return,
2215 syscall::stat:return,
2216 syscall::truncate:return,
2217 syscall::unlink:return,
2218 syscall::utimes:return,
2219 */
2220 """
2221
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002222 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002223 """Starts the log collection with dtrace.
2224
2225 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2226 this needs to wait for dtrace to be "warmed up".
2227 """
2228 super(Dtrace.Tracer, self).__init__(logname)
2229 self._script = create_thunk()
2230 # This unique dummy temp file is used to signal the dtrace script that it
2231 # should stop as soon as all the child processes are done. A bit hackish
2232 # but works fine enough.
2233 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2234 prefix='trace_signal_file')
2235
2236 # Note: do not use the -p flag. It's useless if the initial process quits
2237 # too fast, resulting in missing traces from the grand-children. The D
2238 # code manages the dtrace lifetime itself.
2239 trace_cmd = [
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002240 'dtrace',
2241 # Use a larger buffer if getting 'out of scratch space' errors.
2242 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2243 '-b', '10m',
2244 '-x', 'dynvarsize=10m',
2245 #'-x', 'dtrace_global_maxsize=1m',
2246 '-x', 'evaltime=exec',
2247 '-o', '/dev/stderr',
2248 '-q',
2249 '-n', self._get_dtrace_code(),
2250 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002251 if use_sudo is not False:
2252 trace_cmd.insert(0, 'sudo')
2253
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002254 with open(self._logname + '.log', 'wb') as logfile:
2255 self._dtrace = subprocess.Popen(
2256 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2257 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2258
2259 # Reads until one line is printed, which signifies dtrace is up and ready.
2260 with open(self._logname + '.log', 'rb') as logfile:
2261 while 'dtrace_BEGIN' not in logfile.readline():
2262 if self._dtrace.poll() is not None:
2263 # Do a busy wait. :/
2264 break
2265 logging.debug('dtrace started')
2266
2267 def _get_dtrace_code(self):
2268 """Setups the D code to implement child process tracking.
2269
2270 Injects the cookie in the script so it knows when to stop.
2271
2272 The script will detect any instance of the script created with
2273 create_thunk() and will start tracing it.
2274 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002275 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002276 'inline int PID = %d;\n'
2277 'inline string SCRIPT = "%s";\n'
2278 'inline int FILE_ID = %d;\n'
2279 '\n'
2280 '%s') % (
2281 os.getpid(),
2282 self._script,
2283 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002284 self.D_CODE)
2285 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2286 # Do not enable by default since it tends to spew dtrace: error lines
2287 # because the execve() parameters are not in valid memory at the time of
2288 # logging.
2289 # TODO(maruel): Find a way to make this reliable since it's useful but
2290 # only works in limited/trivial uses cases for now.
2291 out += self.D_CODE_EXECVE
2292 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002293
2294 def trace(self, cmd, cwd, tracename, output):
2295 """Runs dtrace on an executable.
2296
2297 This dtruss is broken when it starts the process itself or when tracing
2298 child processes, this code starts a wrapper process
2299 generated with create_thunk() which starts the executable to trace.
2300 """
2301 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2302 assert os.path.isabs(cmd[0]), cmd[0]
2303 assert os.path.isabs(cwd), cwd
2304 assert os.path.normpath(cwd) == cwd, cwd
2305 with self._lock:
2306 if not self._initialized:
2307 raise TracingFailure(
2308 'Called Tracer.trace() on an unitialized object',
2309 None, None, None, tracename)
2310 assert tracename not in (i['trace'] for i in self._traces)
2311
2312 # Starts the script wrapper to start the child process. This signals the
2313 # dtrace script that this process is to be traced.
2314 stdout = stderr = None
2315 if output:
2316 stdout = subprocess.PIPE
2317 stderr = subprocess.STDOUT
2318 child_cmd = [
2319 sys.executable,
2320 self._script,
2321 tracename,
2322 ]
2323 # Call a dummy function so that dtrace knows I'm about to launch a process
2324 # that needs to be traced.
2325 # Yummy.
2326 child = subprocess.Popen(
2327 child_cmd + fix_python_path(cmd),
2328 stdin=subprocess.PIPE,
2329 stdout=stdout,
2330 stderr=stderr,
2331 cwd=cwd)
2332 logging.debug('Started child pid: %d' % child.pid)
2333
2334 out = child.communicate()[0]
2335 # This doesn't mean tracing is done, one of the grand-child process may
2336 # still be alive. It will be tracked with the dtrace script.
2337
2338 with self._lock:
2339 assert tracename not in (i['trace'] for i in self._traces)
2340 self._traces.append(
2341 {
2342 'cmd': cmd,
2343 'cwd': cwd,
2344 # The pid of strace process, not very useful.
2345 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002346 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002347 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002348 })
2349 return child.returncode, out
2350
2351 def close(self, timeout=None):
2352 """Terminates dtrace."""
2353 logging.debug('close(%s)' % timeout)
2354 try:
2355 try:
2356 super(Dtrace.Tracer, self).close(timeout)
2357 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002358 # ftruncate doesn't exist on Windows.
2359 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002360 if timeout:
2361 start = time.time()
2362 # Use polling. :/
2363 while (self._dtrace.poll() is None and
2364 (time.time() - start) < timeout):
2365 time.sleep(0.1)
2366 self._dtrace.kill()
2367 self._dtrace.wait()
2368 finally:
2369 # Make sure to kill it in any case.
2370 if self._dtrace.poll() is None:
2371 try:
2372 self._dtrace.kill()
2373 self._dtrace.wait()
2374 except OSError:
2375 pass
2376
2377 if self._dtrace.returncode != 0:
2378 # Warn about any dtrace failure but basically ignore it.
2379 print 'dtrace failure: %s' % self._dtrace.returncode
2380 finally:
2381 os.close(self._dummy_file_id)
2382 os.remove(self._dummy_file_name)
2383 os.remove(self._script)
2384
2385 def post_process_log(self):
2386 """Sorts the log back in order when each call occured.
2387
2388 dtrace doesn't save the buffer in strict order since it keeps one buffer
2389 per CPU.
2390 """
2391 super(Dtrace.Tracer, self).post_process_log()
2392 logname = self._logname + '.log'
2393 with open(logname, 'rb') as logfile:
2394 lines = [l for l in logfile if l.strip()]
2395 errors = [l for l in lines if l.startswith('dtrace:')]
2396 if errors:
2397 raise TracingFailure(
2398 'Found errors in the trace: %s' % '\n'.join(errors),
2399 None, None, None, logname)
2400 try:
2401 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2402 except ValueError:
2403 raise TracingFailure(
2404 'Found errors in the trace: %s' % '\n'.join(
2405 l for l in lines if l.split(' ', 1)[0].isdigit()),
2406 None, None, None, logname)
2407 with open(logname, 'wb') as logfile:
2408 logfile.write(''.join(lines))
2409
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002410 def __init__(self, use_sudo=None):
2411 super(Dtrace, self).__init__()
2412 self.use_sudo = use_sudo
2413
2414 def get_tracer(self, logname):
2415 return self.Tracer(logname, self.use_sudo)
2416
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002417 @staticmethod
2418 def clean_trace(logname):
2419 for ext in ('', '.log'):
2420 if os.path.isfile(logname + ext):
2421 os.remove(logname + ext)
2422
2423 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002424 def parse_log(cls, logname, blacklist, trace_name):
2425 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002426 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002427
2428 def blacklist_more(filepath):
2429 # All the HFS metadata is in the form /.vol/...
2430 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2431
2432 data = read_json(logname)
2433 out = []
2434 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002435 if trace_name and item['trace'] != trace_name:
2436 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002437 result = {
2438 'output': item['output'],
2439 'trace': item['trace'],
2440 }
2441 try:
2442 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002443 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2444 # be valid UTF-8 and we control the log output.
2445 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002446 context.on_line(line)
2447 result['results'] = context.to_results()
2448 except TracingFailure:
2449 result['exception'] = sys.exc_info()
2450 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002451 return out
2452
2453
2454class LogmanTrace(ApiBase):
2455 """Uses the native Windows ETW based tracing functionality to trace a child
2456 process.
2457
2458 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2459 the Windows Kernel doesn't have a concept of 'current working directory' at
2460 all. A Win32 process has a map of current directories, one per drive letter
2461 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2462 opened relative to another file_object or as an absolute path. All the current
2463 working directory logic is done in user mode.
2464 """
2465 class Context(ApiBase.Context):
2466 """Processes a ETW log line and keeps the list of existent and non
2467 existent files accessed.
2468
2469 Ignores directories.
2470 """
2471 # These indexes are for the stripped version in json.
2472 EVENT_NAME = 0
2473 TYPE = 1
2474 PID = 2
2475 TID = 3
2476 PROCESSOR_ID = 4
2477 TIMESTAMP = 5
2478 USER_DATA = 6
2479
2480 class Process(ApiBase.Context.Process):
2481 def __init__(self, *args):
2482 super(LogmanTrace.Context.Process, self).__init__(*args)
2483 # Handle file objects that succeeded.
2484 self.file_objects = {}
2485
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002486 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2487 logging.info(
2488 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2489 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002490 super(LogmanTrace.Context, self).__init__(blacklist)
2491 self._drive_map = DosDriveMap()
2492 # Threads mapping to the corresponding process id.
2493 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002494 # Process ID of the tracer, e.g. the temporary script created by
2495 # create_thunk(). This is tricky because the process id may have been
2496 # reused.
2497 self._thunk_pid = thunk_pid
2498 self._thunk_cmd = thunk_cmd
2499 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002500 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002501 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002502
2503 def on_line(self, line):
2504 """Processes a json Event line."""
2505 self._line_number += 1
2506 try:
2507 # By Opcode
2508 handler = getattr(
2509 self,
2510 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2511 None)
2512 if not handler:
2513 raise TracingFailure(
2514 'Unexpected event %s_%s' % (
2515 line[self.EVENT_NAME], line[self.TYPE]),
2516 None, None, None)
2517 handler(line)
2518 except TracingFailure, e:
2519 # Hack in the values since the handler could be a static function.
2520 e.pid = line[self.PID]
2521 e.line = line
2522 e.line_number = self._line_number
2523 # Re-raise the modified exception.
2524 raise
2525 except (KeyError, NotImplementedError, ValueError), e:
2526 raise TracingFailure(
2527 'Trace generated a %s exception: %s' % (
2528 e.__class__.__name__, str(e)),
2529 line[self.PID],
2530 self._line_number,
2531 line,
2532 e)
2533
2534 def to_results(self):
2535 if not self.root_process:
2536 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002537 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002538 None, None, None)
2539 process = self.root_process.to_results_process()
2540 return Results(process)
2541
2542 def _thread_to_process(self, tid):
2543 """Finds the process from the thread id."""
2544 tid = int(tid, 16)
2545 pid = self._threads_active.get(tid)
2546 if not pid or not self._process_lookup.get(pid):
2547 return
2548 return self._process_lookup[pid]
2549
2550 @classmethod
2551 def handle_EventTrace_Header(cls, line):
2552 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2553 BUFFER_SIZE = cls.USER_DATA
2554 #VERSION = cls.USER_DATA + 1
2555 #PROVIDER_VERSION = cls.USER_DATA + 2
2556 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2557 #END_TIME = cls.USER_DATA + 4
2558 #TIMER_RESOLUTION = cls.USER_DATA + 5
2559 #MAX_FILE_SIZE = cls.USER_DATA + 6
2560 #LOG_FILE_MODE = cls.USER_DATA + 7
2561 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2562 #START_BUFFERS = cls.USER_DATA + 9
2563 #POINTER_SIZE = cls.USER_DATA + 10
2564 EVENTS_LOST = cls.USER_DATA + 11
2565 #CPU_SPEED = cls.USER_DATA + 12
2566 #LOGGER_NAME = cls.USER_DATA + 13
2567 #LOG_FILE_NAME = cls.USER_DATA + 14
2568 #BOOT_TIME = cls.USER_DATA + 15
2569 #PERF_FREQ = cls.USER_DATA + 16
2570 #START_TIME = cls.USER_DATA + 17
2571 #RESERVED_FLAGS = cls.USER_DATA + 18
2572 #BUFFERS_LOST = cls.USER_DATA + 19
2573 #SESSION_NAME_STRING = cls.USER_DATA + 20
2574 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2575 if line[EVENTS_LOST] != '0':
2576 raise TracingFailure(
2577 ( '%s events were lost during trace, please increase the buffer '
2578 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2579 None, None, None)
2580
2581 def handle_FileIo_Cleanup(self, line):
2582 """General wisdom: if a file is closed, it's because it was opened.
2583
2584 Note that FileIo_Close is not used since if a file was opened properly but
2585 not closed before the process exits, only Cleanup will be logged.
2586 """
2587 #IRP = self.USER_DATA
2588 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2589 FILE_OBJECT = self.USER_DATA + 2
2590 #FILE_KEY = self.USER_DATA + 3
2591 proc = self._thread_to_process(line[TTID])
2592 if not proc:
2593 # Not a process we care about.
2594 return
2595 file_object = line[FILE_OBJECT]
2596 if file_object in proc.file_objects:
2597 proc.add_file(proc.file_objects.pop(file_object), False)
2598
2599 def handle_FileIo_Create(self, line):
2600 """Handles a file open.
2601
2602 All FileIo events are described at
2603 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2604 for some value of 'description'.
2605
2606 " (..) process and thread id values of the IO events (..) are not valid "
2607 http://msdn.microsoft.com/magazine/ee358703.aspx
2608
2609 The FileIo.Create event doesn't return if the CreateFile() call
2610 succeeded, so keep track of the file_object and check that it is
2611 eventually closed with FileIo_Cleanup.
2612 """
2613 #IRP = self.USER_DATA
2614 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2615 FILE_OBJECT = self.USER_DATA + 2
2616 #CREATE_OPTIONS = self.USER_DATA + 3
2617 #FILE_ATTRIBUTES = self.USER_DATA + 4
2618 #self.USER_DATA + SHARE_ACCESS = 5
2619 OPEN_PATH = self.USER_DATA + 6
2620
2621 proc = self._thread_to_process(line[TTID])
2622 if not proc:
2623 # Not a process we care about.
2624 return
2625
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002626 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002627 # Ignore directories and bare drive right away.
2628 if raw_path.endswith(os.path.sep):
2629 return
2630 filepath = self._drive_map.to_win32(raw_path)
2631 # Ignore bare drive right away. Some may still fall through with format
2632 # like '\\?\X:'
2633 if len(filepath) == 2:
2634 return
2635 file_object = line[FILE_OBJECT]
2636 if os.path.isdir(filepath):
2637 # There is no O_DIRECTORY equivalent on Windows. The closed is
2638 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2639 # simply discard directories are they are found.
2640 return
2641 # Override any stale file object
2642 proc.file_objects[file_object] = filepath
2643
2644 def handle_FileIo_Rename(self, line):
2645 # TODO(maruel): Handle?
2646 pass
2647
2648 def handle_Process_End(self, line):
2649 pid = line[self.PID]
2650 if self._process_lookup.get(pid):
2651 logging.info('Terminated: %d' % pid)
2652 self._process_lookup[pid] = None
2653 else:
2654 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002655 if self._thunk_process and self._thunk_process.pid == pid:
2656 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002657
2658 def handle_Process_Start(self, line):
2659 """Handles a new child process started by PID."""
2660 #UNIQUE_PROCESS_KEY = self.USER_DATA
2661 PROCESS_ID = self.USER_DATA + 1
2662 #PARENT_PID = self.USER_DATA + 2
2663 #SESSION_ID = self.USER_DATA + 3
2664 #EXIT_STATUS = self.USER_DATA + 4
2665 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2666 #USER_SID = self.USER_DATA + 6
2667 IMAGE_FILE_NAME = self.USER_DATA + 7
2668 COMMAND_LINE = self.USER_DATA + 8
2669
2670 ppid = line[self.PID]
2671 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002672 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002673 logging.debug(
2674 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002675 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002676
2677 if pid == self._thunk_pid:
2678 # Need to ignore processes we don't know about because the log is
2679 # system-wide. self._thunk_pid shall start only one process.
2680 # This is tricky though because Windows *loves* to reuse process id and
2681 # it happens often that the process ID of the thunk script created by
2682 # create_thunk() is reused. So just detecting the pid here is not
2683 # sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002684 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2685 logging.info(
2686 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2687 pid, self._trace_name, command_line, self._thunk_cmd)
2688 return
2689
2690 # TODO(maruel): The check is quite weak. Add the thunk path.
2691 if self._thunk_process:
2692 raise TracingFailure(
2693 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2694 'already set') % (self._thunk_pid, self._thunk_process.pid),
2695 None, None, None)
2696 proc = self.Process(self.blacklist, pid, None)
2697 self._thunk_process = proc
2698 return
2699 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002700 proc = self.Process(self.blacklist, pid, None)
2701 self.root_process = proc
2702 ppid = None
2703 elif self._process_lookup.get(ppid):
2704 proc = self.Process(self.blacklist, pid, None)
2705 self._process_lookup[ppid].children.append(proc)
2706 else:
2707 # Ignore
2708 return
2709 self._process_lookup[pid] = proc
2710
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002711 proc.command = command_line
2712 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002713 # proc.command[0] may be the absolute path of 'executable' but it may be
2714 # anything else too. If it happens that command[0] ends with executable,
2715 # use it, otherwise defaults to the base name.
2716 cmd0 = proc.command[0].lower()
2717 if not cmd0.endswith('.exe'):
2718 # TODO(maruel): That's not strictly true either.
2719 cmd0 += '.exe'
2720 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
2721 # Fix the path.
2722 cmd0 = cmd0.replace('/', os.path.sep)
2723 cmd0 = os.path.normpath(cmd0)
2724 proc.executable = get_native_path_case(cmd0)
2725 logging.info(
2726 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2727
2728 def handle_Thread_End(self, line):
2729 """Has the same parameters as Thread_Start."""
2730 tid = int(line[self.TID], 16)
2731 self._threads_active.pop(tid, None)
2732
2733 def handle_Thread_Start(self, line):
2734 """Handles a new thread created.
2735
2736 Do not use self.PID here since a process' initial thread is created by
2737 the parent process.
2738 """
2739 PROCESS_ID = self.USER_DATA
2740 TTHREAD_ID = self.USER_DATA + 1
2741 #STACK_BASE = self.USER_DATA + 2
2742 #STACK_LIMIT = self.USER_DATA + 3
2743 #USER_STACK_BASE = self.USER_DATA + 4
2744 #USER_STACK_LIMIT = self.USER_DATA + 5
2745 #AFFINITY = self.USER_DATA + 6
2746 #WIN32_START_ADDR = self.USER_DATA + 7
2747 #TEB_BASE = self.USER_DATA + 8
2748 #SUB_PROCESS_TAG = self.USER_DATA + 9
2749 #BASE_PRIORITY = self.USER_DATA + 10
2750 #PAGE_PRIORITY = self.USER_DATA + 11
2751 #IO_PRIORITY = self.USER_DATA + 12
2752 #THREAD_FLAGS = self.USER_DATA + 13
2753 # Do not use self.PID here since a process' initial thread is created by
2754 # the parent process.
2755 pid = int(line[PROCESS_ID], 16)
2756 tid = int(line[TTHREAD_ID], 16)
2757 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2758 self._threads_active[tid] = pid
2759
2760 @classmethod
2761 def supported_events(cls):
2762 """Returns all the procesed events."""
2763 out = []
2764 for member in dir(cls):
2765 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2766 if match:
2767 out.append(match.groups())
2768 return out
2769
2770 class Tracer(ApiBase.Tracer):
2771 # The basic headers.
2772 EXPECTED_HEADER = [
2773 u'Event Name',
2774 u'Type',
2775 u'Event ID',
2776 u'Version',
2777 u'Channel',
2778 u'Level', # 5
2779 u'Opcode',
2780 u'Task',
2781 u'Keyword',
2782 u'PID',
2783 u'TID', # 10
2784 u'Processor Number',
2785 u'Instance ID',
2786 u'Parent Instance ID',
2787 u'Activity ID',
2788 u'Related Activity ID', # 15
2789 u'Clock-Time',
2790 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2791 u'User(ms)', # pretty much useless.
2792 u'User Data', # Extra arguments that are event-specific.
2793 ]
2794 # Only the useful headers common to all entries are listed there. Any column
2795 # at 19 or higher is dependent on the specific event.
2796 EVENT_NAME = 0
2797 TYPE = 1
2798 PID = 9
2799 TID = 10
2800 PROCESSOR_ID = 11
2801 TIMESTAMP = 16
2802 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2803 USER_DATA = 19
2804
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002805 class CsvReader(object):
2806 """CSV reader that reads files generated by tracerpt.exe.
2807
2808 csv.reader() fails to read them properly, it mangles file names quoted
2809 with "" with a comma in it.
2810 """
2811 # 0. Had a ',' or one of the following ' ' after a comma, next should
2812 # be ' ', '"' or string or ',' for an empty field.
2813 ( HAD_DELIMITER,
2814 # 1. Processing an unquoted field up to ','.
2815 IN_STR,
2816 # 2. Processing a new field starting with '"'.
2817 STARTING_STR_QUOTED,
2818 # 3. Second quote in a row at the start of a field. It could be either
2819 # '""foo""' or '""'. Who the hell thought it was a great idea to use
2820 # the same character for delimiting and escaping?
2821 STARTING_SECOND_QUOTE,
2822 # 4. A quote inside a quoted string where the previous character was
2823 # not a quote, so the string is not empty. Can be either: end of a
2824 # quoted string (a delimiter) or a quote escape. The next char must be
2825 # either '"' or ','.
2826 HAD_QUOTE_IN_QUOTED,
2827 # 5. Second quote inside a quoted string.
2828 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
2829 # 6. Processing a field that started with '"'.
2830 IN_STR_QUOTED) = range(7)
2831
2832 def __init__(self, f):
2833 self.f = f
2834
2835 def __iter__(self):
2836 return self
2837
2838 def next(self):
2839 """Splits the line in fields."""
2840 line = self.f.readline()
2841 if not line:
2842 raise StopIteration()
2843 line = line.strip()
2844 fields = []
2845 state = self.HAD_DELIMITER
2846 for i, c in enumerate(line):
2847 if state == self.HAD_DELIMITER:
2848 if c == ',':
2849 # Empty field.
2850 fields.append('')
2851 elif c == ' ':
2852 # Ignore initial whitespaces
2853 pass
2854 elif c == '"':
2855 state = self.STARTING_STR_QUOTED
2856 fields.append('')
2857 else:
2858 # Start of a new field.
2859 state = self.IN_STR
2860 fields.append(c)
2861
2862 elif state == self.IN_STR:
2863 # Do not accept quote inside unquoted field.
2864 assert c != '"', (i, c, line, fields)
2865 if c == ',':
2866 fields[-1] = fields[-1].strip()
2867 state = self.HAD_DELIMITER
2868 else:
2869 fields[-1] = fields[-1] + c
2870
2871 elif state == self.STARTING_STR_QUOTED:
2872 if c == '"':
2873 # Do not store the character yet.
2874 state = self.STARTING_SECOND_QUOTE
2875 else:
2876 state = self.IN_STR_QUOTED
2877 fields[-1] = fields[-1] + c
2878
2879 elif state == self.STARTING_SECOND_QUOTE:
2880 if c == ',':
2881 # It was an empty field. '""' == ''.
2882 state = self.HAD_DELIMITER
2883 else:
2884 fields[-1] = fields[-1] + '"' + c
2885 state = self.IN_STR_QUOTED
2886
2887 elif state == self.HAD_QUOTE_IN_QUOTED:
2888 if c == ',':
2889 # End of the string.
2890 state = self.HAD_DELIMITER
2891 elif c == '"':
2892 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
2893 else:
2894 # The previous double-quote was just an unescaped quote.
2895 fields[-1] = fields[-1] + '"' + c
2896 state = self.IN_STR_QUOTED
2897
2898 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2899 if c == ',':
2900 # End of the string.
2901 state = self.HAD_DELIMITER
2902 fields[-1] = fields[-1] + '"'
2903 else:
2904 assert False, (i, c, line, fields)
2905
2906 elif state == self.IN_STR_QUOTED:
2907 if c == '"':
2908 # Could be a delimiter or an escape.
2909 state = self.HAD_QUOTE_IN_QUOTED
2910 else:
2911 fields[-1] = fields[-1] + c
2912
2913 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2914 fields[-1] = fields[-1] + '"'
2915 else:
2916 assert state in (
2917 # Terminated with a normal field.
2918 self.IN_STR,
2919 # Terminated with an empty field.
2920 self.STARTING_SECOND_QUOTE,
2921 # Terminated with a normal quoted field.
2922 self.HAD_QUOTE_IN_QUOTED), (
2923 line, state, fields)
2924 return fields
2925
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002926 def __init__(self, logname):
2927 """Starts the log collection.
2928
2929 Requires administrative access. logman.exe is synchronous so no need for a
2930 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2931 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2932
2933 One can get the list of potentially interesting providers with:
2934 "logman query providers | findstr /i file"
2935 """
2936 super(LogmanTrace.Tracer, self).__init__(logname)
2937 self._script = create_thunk()
2938 cmd_start = [
2939 'logman.exe',
2940 'start',
2941 'NT Kernel Logger',
2942 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2943 # splitio,fileiocompletion,syscall,file,cswitch,img
2944 '(process,fileio,thread)',
2945 '-o', self._logname + '.etl',
2946 '-ets', # Send directly to kernel
2947 # Values extracted out of thin air.
2948 # Event Trace Session buffer size in kb.
2949 '-bs', '10240',
2950 # Number of Event Trace Session buffers.
2951 '-nb', '16', '256',
2952 ]
2953 logging.debug('Running: %s' % cmd_start)
2954 try:
2955 subprocess.check_call(
2956 cmd_start,
2957 stdin=subprocess.PIPE,
2958 stdout=subprocess.PIPE,
2959 stderr=subprocess.STDOUT)
2960 except subprocess.CalledProcessError, e:
2961 if e.returncode == -2147024891:
2962 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2963 elif e.returncode == -2144337737:
2964 print >> sys.stderr, (
2965 'A kernel trace was already running, stop it and try again')
2966 raise
2967
2968 def trace(self, cmd, cwd, tracename, output):
2969 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2970 assert os.path.isabs(cmd[0]), cmd[0]
2971 assert os.path.isabs(cwd), cwd
2972 assert os.path.normpath(cwd) == cwd, cwd
2973 with self._lock:
2974 if not self._initialized:
2975 raise TracingFailure(
2976 'Called Tracer.trace() on an unitialized object',
2977 None, None, None, tracename)
2978 assert tracename not in (i['trace'] for i in self._traces)
2979
2980 # Use "logman -?" for help.
2981
2982 stdout = stderr = None
2983 if output:
2984 stdout = subprocess.PIPE
2985 stderr = subprocess.STDOUT
2986
2987 # Run the child process.
2988 logging.debug('Running: %s' % cmd)
2989 # Use the temporary script generated with create_thunk() so we have a
2990 # clear pid owner. Since trace_inputs.py can be used as a library and
2991 # could trace multiple processes simultaneously, it makes it more complex
2992 # if the executable to be traced is executed directly here. It also solves
2993 # issues related to logman.exe that needs to be executed to control the
2994 # kernel trace.
2995 child_cmd = [
2996 sys.executable,
2997 self._script,
2998 tracename,
2999 ]
3000 child = subprocess.Popen(
3001 child_cmd + fix_python_path(cmd),
3002 cwd=cwd,
3003 stdin=subprocess.PIPE,
3004 stdout=stdout,
3005 stderr=stderr)
3006 logging.debug('Started child pid: %d' % child.pid)
3007 out = child.communicate()[0]
3008 # This doesn't mean all the grand-children are done. Sadly, we don't have
3009 # a good way to determine that.
3010
3011 with self._lock:
3012 assert tracename not in (i['trace'] for i in self._traces)
3013 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00003014 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003015 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00003016 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003017 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00003018 # Used to figure out the real process when process ids are reused.
3019 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003020 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003021 })
3022
3023 return child.returncode, out
3024
3025 def close(self, _timeout=None):
3026 """Stops the kernel log collection and converts the traces to text
3027 representation.
3028 """
3029 with self._lock:
3030 if not self._initialized:
3031 raise TracingFailure(
3032 'Called Tracer.close() on an unitialized object',
3033 None, None, None)
3034 os.remove(self._script)
3035 # Save metadata, add 'format' key..
3036 data = {
3037 'format': 'csv',
3038 'traces': self._traces,
3039 }
3040 write_json(self._logname, data, False)
3041
3042 cmd_stop = [
3043 'logman.exe',
3044 'stop',
3045 'NT Kernel Logger',
3046 '-ets', # Sends the command directly to the kernel.
3047 ]
3048 logging.debug('Running: %s' % cmd_stop)
3049 subprocess.check_call(
3050 cmd_stop,
3051 stdin=subprocess.PIPE,
3052 stdout=subprocess.PIPE,
3053 stderr=subprocess.STDOUT)
3054 self._initialized = False
3055
3056 def post_process_log(self):
3057 """Converts the .etl file into .csv then into .json."""
3058 super(LogmanTrace.Tracer, self).post_process_log()
3059 logformat = 'csv'
3060 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003061 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003062
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003063 def _trim_log(self, logformat):
3064 """Reduces the amount of data in original log by generating a 'reduced'
3065 log.
3066 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003067 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003068 file_handle = codecs.open(
3069 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003070
3071 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003072 assert sys.getfilesystemencoding() == 'mbcs'
3073 file_handle = codecs.open(
3074 self._logname + '.' + logformat, 'r',
3075 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003076
3077 supported_events = LogmanTrace.Context.supported_events()
3078
3079 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003080 """Loads items from the generator and returns the interesting data.
3081
3082 It filters out any uninteresting line and reduce the amount of data in
3083 the trace.
3084 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003085 for index, line in enumerate(generator):
3086 if not index:
3087 if line != self.EXPECTED_HEADER:
3088 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003089 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003090 None, None, None)
3091 continue
3092 # As you can see, the CSV is full of useful non-redundant information:
3093 if (line[2] != '0' or # Event ID
3094 line[3] not in ('2', '3') or # Version
3095 line[4] != '0' or # Channel
3096 line[5] != '0' or # Level
3097 line[7] != '0' or # Task
3098 line[8] != '0x0000000000000000' or # Keyword
3099 line[12] != '' or # Instance ID
3100 line[13] != '' or # Parent Instance ID
3101 line[14] != self.NULL_GUID or # Activity ID
3102 line[15] != ''): # Related Activity ID
3103 raise TracingFailure(
3104 'Found unexpected values in line: %s' % ' '.join(line),
3105 None, None, None)
3106
3107 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3108 continue
3109
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003110 yield [
3111 line[self.EVENT_NAME],
3112 line[self.TYPE],
3113 line[self.PID],
3114 line[self.TID],
3115 line[self.PROCESSOR_ID],
3116 line[self.TIMESTAMP],
3117 ] + line[self.USER_DATA:]
3118
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003119 # must not convert the trim() call into a list, since it will use too much
3120 # memory for large trace. use a csv file as a workaround since the json
3121 # parser requires a complete in-memory file.
3122 with open('%s.preprocessed' % self._logname, 'wb') as f:
3123 # $ and * can't be used in file name on windows, reducing the likelihood
3124 # of having to escape a string.
3125 out = csv.writer(
3126 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003127 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003128 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003129
3130 def _convert_log(self, logformat):
3131 """Converts the ETL trace to text representation.
3132
3133 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3134 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3135
3136 Arguments:
3137 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3138
3139 Use "tracerpt -?" for help.
3140 """
3141 LOCALE_INVARIANT = 0x7F
3142 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3143 cmd_convert = [
3144 'tracerpt.exe',
3145 '-l', self._logname + '.etl',
3146 '-o', self._logname + '.' + logformat,
3147 '-gmt', # Use UTC
3148 '-y', # No prompt
3149 # Use -of XML to get the header of each items after column 19, e.g. all
3150 # the actual headers of 'User Data'.
3151 ]
3152
3153 if logformat == 'csv':
3154 # tracerpt localizes the 'Type' column, for major brainfuck
3155 # entertainment. I can't imagine any sane reason to do that.
3156 cmd_convert.extend(['-of', 'CSV'])
3157 elif logformat == 'csv_utf16':
3158 # This causes it to use UTF-16, which doubles the log size but ensures
3159 # the log is readable for non-ASCII characters.
3160 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3161 elif logformat == 'xml':
3162 cmd_convert.extend(['-of', 'XML'])
3163 else:
3164 raise ValueError('Unexpected log format \'%s\'' % logformat)
3165 logging.debug('Running: %s' % cmd_convert)
3166 # This can takes tens of minutes for large logs.
3167 # Redirects all output to stderr.
3168 subprocess.check_call(
3169 cmd_convert,
3170 stdin=subprocess.PIPE,
3171 stdout=sys.stderr,
3172 stderr=sys.stderr)
3173
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003174 def __init__(self, use_sudo=False): # pylint: disable=W0613
3175 super(LogmanTrace, self).__init__()
3176 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3177
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003178 @staticmethod
3179 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003180 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003181 if os.path.isfile(logname + ext):
3182 os.remove(logname + ext)
3183
3184 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003185 def parse_log(cls, logname, blacklist, trace_name):
3186 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003187 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003188
3189 def blacklist_more(filepath):
3190 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3191 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3192
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003193 # Create a list of (Context, result_dict) tuples. This is necessary because
3194 # the csv file may be larger than the amount of available memory.
3195 contexes = [
3196 (
3197 cls.Context(
3198 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3199 {
3200 'output': item['output'],
3201 'trace': item['trace'],
3202 },
3203 )
3204 for item in read_json(logname)['traces']
3205 if not trace_name or item['trace'] == trace_name
3206 ]
3207
3208 # The log may be too large to fit in memory and it is not efficient to read
3209 # it multiple times, so multiplex the contexes instead, which is slightly
3210 # more awkward.
3211 with open('%s.preprocessed' % logname, 'rb') as f:
3212 lines = csv.reader(
3213 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3214 for encoded in lines:
3215 line = [s.decode('utf-8') for s in encoded]
3216 # Convert the PID in-place from hex.
3217 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3218 for context in contexes:
3219 if 'exception' in context[1]:
3220 continue
3221 try:
3222 context[0].on_line(line)
3223 except TracingFailure:
3224 context[1]['exception'] = sys.exc_info()
3225
3226 for context in contexes:
3227 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003228 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003229 context[1]['results'] = context[0].to_results()
3230
3231 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003232
3233
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003234def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003235 """Returns the correct implementation for the current OS."""
3236 if sys.platform == 'cygwin':
3237 raise NotImplementedError(
3238 'Not implemented for cygwin, start the script from Win32 python')
3239 flavors = {
3240 'win32': LogmanTrace,
3241 'darwin': Dtrace,
3242 'sunos5': Dtrace,
3243 'freebsd7': Dtrace,
3244 'freebsd8': Dtrace,
3245 }
3246 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003247 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003248
3249
3250def extract_directories(root_dir, files, blacklist):
3251 """Detects if all the files in a directory are in |files| and if so, replace
3252 the individual files by a Results.Directory instance.
3253
3254 Takes a list of Results.File instances and returns a shorter list of
3255 Results.File and Results.Directory instances.
3256
3257 Arguments:
3258 - root_dir: Optional base directory that shouldn't be search further.
3259 - files: list of Results.File instances.
3260 - blacklist: regexp of files to ignore, for example r'.+\.pyc'.
3261 """
3262 logging.info(
3263 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3264 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003265 # It is important for root_dir to not be a symlinked path, make sure to call
3266 # os.path.realpath() as needed.
3267 assert not root_dir or (
3268 os.path.realpath(get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003269 assert not any(isinstance(f, Results.Directory) for f in files)
3270 # Remove non existent files.
3271 files = [f for f in files if f.existent]
3272 if not files:
3273 return files
3274 # All files must share the same root, which can be None.
3275 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3276
3277 # Creates a {directory: {filename: File}} mapping, up to root.
3278 buckets = {}
3279 if root_dir:
3280 buckets[root_dir] = {}
3281 for fileobj in files:
3282 path = fileobj.full_path
3283 directory = os.path.dirname(path)
3284 assert directory
3285 # Do not use os.path.basename() so trailing os.path.sep is kept.
3286 basename = path[len(directory)+1:]
3287 files_in_directory = buckets.setdefault(directory, {})
3288 files_in_directory[basename] = fileobj
3289 # Add all the directories recursively up to root.
3290 while True:
3291 old_d = directory
3292 directory = os.path.dirname(directory)
3293 if directory + os.path.sep == root_dir or directory == old_d:
3294 break
3295 buckets.setdefault(directory, {})
3296
3297 root_prefix = len(root_dir) + 1 if root_dir else 0
3298 for directory in sorted(buckets, reverse=True):
3299 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3300 expected = set(buckets[directory])
3301 if not (actual - expected):
3302 parent = os.path.dirname(directory)
3303 buckets[parent][os.path.basename(directory)] = Results.Directory(
3304 root_dir,
3305 directory[root_prefix:],
3306 False,
3307 sum(f.size for f in buckets[directory].itervalues()),
3308 sum(f.nb_files for f in buckets[directory].itervalues()))
3309 # Remove the whole bucket.
3310 del buckets[directory]
3311
3312 # Reverse the mapping with what remains. The original instances are returned,
3313 # so the cached meta data is kept.
3314 files = sum((x.values() for x in buckets.itervalues()), [])
3315 return sorted(files, key=lambda x: x.path)
3316
3317
3318def trace(logfile, cmd, cwd, api, output):
3319 """Traces an executable. Returns (returncode, output) from api.
3320
3321 Arguments:
3322 - logfile: file to write to.
3323 - cmd: command to run.
3324 - cwd: current directory to start the process in.
3325 - api: a tracing api instance.
3326 - output: if True, returns output, otherwise prints it at the console.
3327 """
3328 cmd = fix_python_path(cmd)
3329 api.clean_trace(logfile)
3330 with api.get_tracer(logfile) as tracer:
3331 return tracer.trace(cmd, cwd, 'default', output)
3332
3333
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003334def CMDclean(args):
3335 """Cleans up traces."""
3336 parser = OptionParserTraceInputs(command='clean')
3337 options, args = parser.parse_args(args)
3338 api = get_api()
3339 api.clean_trace(options.log)
3340 return 0
3341
3342
3343def CMDtrace(args):
3344 """Traces an executable."""
3345 parser = OptionParserTraceInputs(command='trace')
3346 parser.allow_interspersed_args = False
3347 parser.add_option(
3348 '-q', '--quiet', action='store_true',
3349 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003350 parser.add_option(
3351 '-s', '--sudo', action='store_true',
3352 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3353 parser.add_option(
3354 '-n', '--no-sudo', action='store_false',
3355 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003356 options, args = parser.parse_args(args)
3357
3358 if not args:
3359 parser.error('Please provide a command to run')
3360
3361 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3362 args[0] = os.path.abspath(args[0])
3363
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003364 # options.sudo default value is None, which is to do whatever tracer defaults
3365 # do.
3366 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003367 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3368
3369
3370def CMDread(args):
3371 """Reads the logs and prints the result."""
3372 parser = OptionParserTraceInputs(command='read')
3373 parser.add_option(
3374 '-V', '--variable',
3375 nargs=2,
3376 action='append',
3377 dest='variables',
3378 metavar='VAR_NAME directory',
3379 default=[],
3380 help=('Variables to replace relative directories against. Example: '
3381 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3382 'home dir with $HOME') % getpass.getuser())
3383 parser.add_option(
3384 '--root-dir',
3385 help='Root directory to base everything off it. Anything outside of this '
3386 'this directory will not be reported')
3387 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003388 '--trace-name',
3389 help='Only reads one of the trace. Defaults to reading all traces')
3390 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003391 '-j', '--json', action='store_true',
3392 help='Outputs raw result data as json')
3393 parser.add_option(
3394 '-b', '--blacklist', action='append', default=[],
3395 help='List of regexp to use as blacklist filter')
3396 options, args = parser.parse_args(args)
3397
3398 if options.root_dir:
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003399 options.root_dir = unicode(os.path.abspath(options.root_dir))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003400
3401 variables = dict(options.variables)
3402 api = get_api()
3403 def blacklist(f):
3404 return any(re.match(b, f) for b in options.blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003405 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003406 # Process each trace.
3407 output_as_json = []
3408 for item in data:
3409 if 'exception' in item:
maruel@chromium.org424f5692012-10-15 15:42:38 +00003410 # Do not abort the other traces.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003411 print >> sys.stderr, (
maruel@chromium.org424f5692012-10-15 15:42:38 +00003412 'Trace %s: Got an exception: %s' % (
3413 item['trace'], item['exception'][1]))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003414 continue
3415 results = item['results']
3416 if options.root_dir:
3417 results = results.strip_root(options.root_dir)
3418
3419 if options.json:
3420 output_as_json.append(results.flatten())
3421 else:
3422 simplified = extract_directories(
3423 options.root_dir, results.files, blacklist)
3424 simplified = [f.replace_variables(variables) for f in simplified]
3425 if len(data) > 1:
3426 print('Trace: %s' % item['trace'])
3427 print('Total: %d' % len(results.files))
3428 print('Non existent: %d' % len(results.non_existent))
3429 for f in results.non_existent:
3430 print(' %s' % f.path)
3431 print(
3432 'Interesting: %d reduced to %d' % (
3433 len(results.existent), len(simplified)))
3434 for f in simplified:
3435 print(' %s' % f.path)
3436
3437 if options.json:
3438 write_json(sys.stdout, output_as_json, False)
3439 return 0
3440
3441
3442class OptionParserWithLogging(optparse.OptionParser):
3443 """Adds --verbose option."""
3444 def __init__(self, verbose=0, **kwargs):
3445 optparse.OptionParser.__init__(self, **kwargs)
3446 self.add_option(
3447 '-v', '--verbose',
3448 action='count',
3449 default=verbose,
3450 help='Use multiple times to increase verbosity')
3451
3452 def parse_args(self, *args, **kwargs):
3453 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
3454 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
3455 logging.basicConfig(
3456 level=levels[min(len(levels)-1, options.verbose)],
3457 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
3458 return options, args
3459
3460
3461class OptionParserWithNiceDescription(OptionParserWithLogging):
3462 """Generates the description with the command's docstring."""
3463 def __init__(self, **kwargs):
3464 """Sets 'description' and 'usage' if not already specified."""
3465 command = kwargs.pop('command', 'help')
3466 kwargs.setdefault(
3467 'description',
3468 re.sub('[\r\n ]{2,}', ' ', get_command_handler(command).__doc__))
3469 kwargs.setdefault('usage', '%%prog %s [options]' % command)
3470 OptionParserWithLogging.__init__(self, **kwargs)
3471
3472
3473class OptionParserTraceInputs(OptionParserWithNiceDescription):
3474 """Adds automatic --log handling."""
3475 def __init__(self, **kwargs):
3476 OptionParserWithNiceDescription.__init__(self, **kwargs)
3477 self.add_option(
3478 '-l', '--log', help='Log file to generate or read, required')
3479
3480 def parse_args(self, *args, **kwargs):
3481 """Makes sure the paths make sense.
3482
3483 On Windows, / and \ are often mixed together in a path.
3484 """
3485 options, args = OptionParserWithNiceDescription.parse_args(
3486 self, *args, **kwargs)
3487 if not options.log:
3488 self.error('Must supply a log file with -l')
3489 options.log = os.path.abspath(options.log)
3490 return options, args
3491
3492
3493def extract_documentation():
3494 """Returns a dict {command: description} for each of documented command."""
3495 commands = (
3496 fn[3:]
3497 for fn in dir(sys.modules['__main__'])
3498 if fn.startswith('CMD') and get_command_handler(fn[3:]).__doc__)
3499 return dict((fn, get_command_handler(fn).__doc__) for fn in commands)
3500
3501
3502def CMDhelp(args):
3503 """Prints list of commands or help for a specific command."""
3504 doc = extract_documentation()
3505 # Calculates the optimal offset.
3506 offset = max(len(cmd) for cmd in doc)
3507 format_str = ' %-' + str(offset + 2) + 's %s'
3508 # Generate a one-liner documentation of each commands.
3509 commands_description = '\n'.join(
3510 format_str % (cmd, doc[cmd].split('\n')[0]) for cmd in sorted(doc))
3511
3512 parser = OptionParserWithNiceDescription(
3513 usage='%prog <command> [options]',
3514 description='Commands are:\n%s\n' % commands_description)
3515 parser.format_description = lambda _: parser.description
3516
3517 # Strip out any -h or --help argument.
3518 _, args = parser.parse_args([i for i in args if not i in ('-h', '--help')])
3519 if len(args) == 1:
3520 if not get_command_handler(args[0]):
3521 parser.error('Unknown command %s' % args[0])
3522 # The command was "%prog help command", replaces ourself with
3523 # "%prog command --help" so help is correctly printed out.
3524 return main(args + ['--help'])
3525 elif args:
3526 parser.error('Unknown argument "%s"' % ' '.join(args))
3527 parser.print_help()
3528 return 0
3529
3530
3531def get_command_handler(name):
3532 """Returns the command handler or CMDhelp if it doesn't exist."""
3533 return getattr(sys.modules['__main__'], 'CMD%s' % name, None)
3534
3535
3536def main_impl(argv):
3537 command = get_command_handler(argv[0] if argv else 'help')
3538 if not command:
3539 return CMDhelp(argv)
3540 return command(argv[1:])
3541
3542def main(argv):
3543 try:
3544 main_impl(argv)
3545 except TracingFailure, e:
3546 sys.stderr.write('\nError: ')
3547 sys.stderr.write(str(e))
3548 sys.stderr.write('\n')
3549 return 1
3550
3551
3552if __name__ == '__main__':
3553 sys.exit(main(sys.argv[1:]))