blob: a3c804c5382a7a898d0b8699930a33aa0a221677 [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
3# Copyright (c) 2012 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
23import getpass
24import glob
25import json
26import logging
27import optparse
28import os
29import re
30import subprocess
31import sys
32import tempfile
33import threading
34import time
35import weakref
36
37## OS-specific imports
38
39if sys.platform == 'win32':
40 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p
41 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611
42 from ctypes.wintypes import GetLastError # pylint: disable=E0611
43elif sys.platform == 'darwin':
44 import Carbon.File # pylint: disable=F0401
45 import MacOS # pylint: disable=F0401
46
47
48BASE_DIR = os.path.dirname(os.path.abspath(__file__))
49ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
50
51
52class TracingFailure(Exception):
53 """An exception occured during tracing."""
54 def __init__(self, description, pid, line_number, line, *args):
55 super(TracingFailure, self).__init__(
56 description, pid, line_number, line, *args)
57 self.description = description
58 self.pid = pid
59 self.line_number = line_number
60 self.line = line
61 self.extra = args
62
63 def __str__(self):
64 out = self.description
65 if self.pid:
66 out += '\npid: %d' % self.pid
67 if self.line_number:
68 out += '\nline: %d' % self.line_number
69 if self.line:
70 out += '\n%s' % self.line
71 if self.extra:
72 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
73 return out
74
75
76## OS-specific functions
77
78if sys.platform == 'win32':
79 def QueryDosDevice(drive_letter):
80 """Returns the Windows 'native' path for a DOS drive letter."""
81 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
82 # Guesswork. QueryDosDeviceW never returns the required number of bytes.
83 chars = 1024
84 drive_letter = unicode(drive_letter)
85 p = create_unicode_buffer(chars)
86 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):
87 err = GetLastError()
88 if err:
89 # pylint: disable=E0602
90 raise WindowsError(
91 err,
92 'QueryDosDevice(%s): %s (%d)' % (
93 str(drive_letter), FormatError(err), err))
94 return p.value
95
96
97 def GetShortPathName(long_path):
98 """Returns the Windows short path equivalent for a 'long' path."""
99 long_path = unicode(long_path)
100 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
101 # not enforced.
102 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):
103 long_path = '\\\\?\\' + long_path
104 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)
105 if chars:
106 p = create_unicode_buffer(chars)
107 if windll.kernel32.GetShortPathNameW(long_path, p, chars):
108 return p.value
109
110 err = GetLastError()
111 if err:
112 # pylint: disable=E0602
113 raise WindowsError(
114 err,
115 'GetShortPathName(%s): %s (%d)' % (
116 str(long_path), FormatError(err), err))
117
118
119 def GetLongPathName(short_path):
120 """Returns the Windows long path equivalent for a 'short' path."""
121 short_path = unicode(short_path)
122 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
123 # not enforced.
124 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):
125 short_path = '\\\\?\\' + short_path
126 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)
127 if chars:
128 p = create_unicode_buffer(chars)
129 if windll.kernel32.GetLongPathNameW(short_path, p, chars):
130 return p.value
131
132 err = GetLastError()
133 if err:
134 # pylint: disable=E0602
135 raise WindowsError(
136 err,
137 'GetLongPathName(%s): %s (%d)' % (
138 str(short_path), FormatError(err), err))
139
140
141 def get_current_encoding():
142 """Returns the 'ANSI' code page associated to the process."""
143 return 'cp%d' % int(windll.kernel32.GetACP())
144
145
146 class DosDriveMap(object):
147 """Maps \Device\HarddiskVolumeN to N: on Windows."""
148 # Keep one global cache.
149 _MAPPING = {}
150
151 def __init__(self):
152 """Lazy loads the cache."""
153 if not self._MAPPING:
154 # This is related to UNC resolver on windows. Ignore that.
155 self._MAPPING['\\Device\\Mup'] = None
156 self._MAPPING['\\SystemRoot'] = os.environ['SystemRoot']
157
158 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
159 try:
160 letter = '%s:' % letter
161 mapped = QueryDosDevice(letter)
162 if mapped in self._MAPPING:
163 logging.warn(
164 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
165 '. Drive letters are a user-mode concept and the kernel '
166 'traces only have NT path, so all accesses will be '
167 'associated with the first drive letter, independent of the '
168 'actual letter used by the code') % (
169 self._MAPPING[mapped], letter))
170 else:
171 self._MAPPING[mapped] = letter
172 except WindowsError: # pylint: disable=E0602
173 pass
174
175 def to_win32(self, path):
176 """Converts a native NT path to Win32/DOS compatible path."""
177 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
178 if not match:
179 raise ValueError(
180 'Can\'t convert %s into a Win32 compatible path' % path,
181 path)
182 if not match.group(1) in self._MAPPING:
183 # Unmapped partitions may be accessed by windows for the
184 # fun of it while the test is running. Discard these.
185 return None
186 drive = self._MAPPING[match.group(1)]
187 if not drive or not match.group(2):
188 return drive
189 return drive + match.group(2)
190
191
192 def isabs(path):
193 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""
194 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
195
196
197 def get_native_path_case(p):
198 """Returns the native path case for an existing file.
199
200 On Windows, removes any leading '\\?\'.
201 """
202 if not isabs(p):
203 raise ValueError(
204 'Can\'t get native path case for a non-absolute path: %s' % p,
205 p)
206 # Windows used to have an option to turn on case sensitivity on non Win32
207 # subsystem but that's out of scope here and isn't supported anymore.
208 # Go figure why GetShortPathName() is needed.
209 try:
210 out = GetLongPathName(GetShortPathName(p))
211 except OSError, e:
212 if e.args[0] in (2, 3, 5):
213 # The path does not exist. Try to recurse and reconstruct the path.
214 base = os.path.dirname(p)
215 rest = os.path.basename(p)
216 return os.path.join(get_native_path_case(base), rest)
217 raise
218 if out.startswith('\\\\?\\'):
219 out = out[4:]
220 # Always upper case the first letter since GetLongPathName() will return the
221 # drive letter in the case it was given.
222 return out[0].upper() + out[1:]
223
224
225 def CommandLineToArgvW(command_line):
226 """Splits a commandline into argv using CommandLineToArgvW()."""
227 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
228 size = c_int()
229 ptr = windll.shell32.CommandLineToArgvW(unicode(command_line), byref(size))
230 try:
231 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
232 finally:
233 windll.kernel32.LocalFree(ptr)
234
235
236elif sys.platform == 'darwin':
237
238
239 # On non-windows, keep the stdlib behavior.
240 isabs = os.path.isabs
241
242
243 def _find_item_native_case(root_path, item):
244 """Gets the native path case of a single item based at root_path.
245
246 There is no API to get the native path case of symlinks on OSX. So it
247 needs to be done the slow way.
248 """
249 item = item.lower()
250 for element in os.listdir(root_path):
251 if element.lower() == item:
252 return element
253
254
255 def _native_case(p):
256 """Gets the native path case. Warning: this function resolves symlinks."""
257 logging.debug('native_case(%s)' % p)
258 try:
259 rel_ref, _ = Carbon.File.FSPathMakeRef(p)
260 out = rel_ref.FSRefMakePath()
261 if p.endswith(os.path.sep) and not out.endswith(os.path.sep):
262 return out + os.path.sep
263 return out
264 except MacOS.Error, e:
265 if e.args[0] in (-43, -120):
266 # The path does not exist. Try to recurse and reconstruct the path.
267 # -43 means file not found.
268 # -120 means directory not found.
269 base = os.path.dirname(p)
270 rest = os.path.basename(p)
271 return os.path.join(_native_case(base), rest)
272 raise OSError(
273 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])
274
275
276 def _split_at_symlink_native(base_path, rest):
277 """Returns the native path for a symlink."""
278 base, symlink, rest = split_at_symlink(base_path, rest)
279 if symlink:
280 if not base_path:
281 base_path = base
282 else:
283 base_path = safe_join(base_path, base)
284 symlink = _find_item_native_case(base_path, symlink)
285 return base, symlink, rest
286
287
288 def get_native_path_case(path):
289 """Returns the native path case for an existing file.
290
291 Technically, it's only HFS+ on OSX that is case preserving and
292 insensitive. It's the default setting on HFS+ but can be changed.
293 """
294 if not isabs(path):
295 raise ValueError(
296 'Can\'t get native path case for a non-absolute path: %s' % path,
297 path)
298 if path.startswith('/dev'):
299 # /dev is not visible from Carbon, causing an exception.
300 return path
301
302 # Starts assuming there is no symlink along the path.
303 resolved = _native_case(path)
304 if resolved.lower() == path.lower():
305 # This code path is incredibly faster.
306 return resolved
307
308 # There was a symlink, process it.
309 base, symlink, rest = _split_at_symlink_native(None, path)
310 assert symlink, (path, base, symlink, rest, resolved)
311 prev = base
312 base = safe_join(_native_case(base), symlink)
313 assert len(base) > len(prev)
314 while rest:
315 prev = base
316 relbase, symlink, rest = _split_at_symlink_native(base, rest)
317 base = safe_join(base, relbase)
318 assert len(base) > len(prev), (prev, base, symlink)
319 if symlink:
320 base = safe_join(base, symlink)
321 assert len(base) > len(prev), (prev, base, symlink)
322 # Make sure no symlink was resolved.
323 assert base.lower() == path.lower(), (base, path)
324 return base
325
326
327else: # OSes other than Windows and OSX.
328
329
330 # On non-windows, keep the stdlib behavior.
331 isabs = os.path.isabs
332
333
334 def get_native_path_case(path):
335 """Returns the native path case for an existing file.
336
337 On OSes other than OSX and Windows, assume the file system is
338 case-sensitive.
339
340 TODO(maruel): This is not strictly true. Implement if necessary.
341 """
342 if not isabs(path):
343 raise ValueError(
344 'Can\'t get native path case for a non-absolute path: %s' % path,
345 path)
346 # Give up on cygwin, as GetLongPathName() can't be called.
347 # Linux traces tends to not be normalized so use this occasion to normalize
348 # it. This function implementation already normalizes the path on the other
349 # OS so this needs to be done here to be coherent between OSes.
350 out = os.path.normpath(path)
351 if path.endswith(os.path.sep) and not out.endswith(os.path.sep):
352 return out + os.path.sep
353 return out
354
355
356if sys.platform != 'win32': # All non-Windows OSes.
357
358
359 def safe_join(*args):
360 """Joins path elements like os.path.join() but doesn't abort on absolute
361 path.
362
363 os.path.join('foo', '/bar') == '/bar'
364 but safe_join('foo', '/bar') == 'foo/bar'.
365 """
366 out = ''
367 for element in args:
368 if element.startswith(os.path.sep):
369 if out.endswith(os.path.sep):
370 out += element[1:]
371 else:
372 out += element
373 else:
374 if out.endswith(os.path.sep):
375 out += element
376 else:
377 out += os.path.sep + element
378 return out
379
380
381 def split_at_symlink(base_dir, relfile):
382 """Scans each component of relfile and cut the string at the symlink if
383 there is any.
384
385 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if
386 not symlink was found.
387 """
388 if base_dir:
389 assert relfile
390 assert os.path.isabs(base_dir)
391 index = 0
392 else:
393 assert os.path.isabs(relfile)
394 index = 1
395
396 def at_root(rest):
397 if base_dir:
398 return safe_join(base_dir, rest)
399 return rest
400
401 while True:
402 try:
403 index = relfile.index(os.path.sep, index)
404 except ValueError:
405 index = len(relfile)
406 full = at_root(relfile[:index])
407 if os.path.islink(full):
408 # A symlink!
409 base = os.path.dirname(relfile[:index])
410 symlink = os.path.basename(relfile[:index])
411 rest = relfile[index:]
412 logging.debug(
413 'split_at_symlink(%s, %s) -> (%s, %s, %s)' %
414 (base_dir, relfile, base, symlink, rest))
415 return base, symlink, rest
416 if index == len(relfile):
417 break
418 index += 1
419 return relfile, None, None
420
421
422def fix_python_path(cmd):
423 """Returns the fixed command line to call the right python executable."""
424 out = cmd[:]
425 if out[0] == 'python':
426 out[0] = sys.executable
427 elif out[0].endswith('.py'):
428 out.insert(0, sys.executable)
429 return out
430
431
432def create_thunk():
433 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
434 os.write(
435 handle,
436 (
437 'import subprocess\n'
438 'import sys\n'
439 'sys.exit(subprocess.call(sys.argv[2:]))\n'
440 ))
441 os.close(handle)
442 return name
443
444
445def strace_process_quoted_arguments(text):
446 """Extracts quoted arguments on a string and return the arguments as a list.
447
448 Implemented as an automaton. Supports incomplete strings in the form
449 '"foo"...'.
450
451 Example:
452 With text = '"foo", "bar"', the function will return ['foo', 'bar']
453
454 TODO(maruel): Implement escaping.
455 """
456 # All the possible states of the DFA.
457 ( NEED_QUOTE, # Begining of a new arguments.
458 INSIDE_STRING, # Inside an argument.
459 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
460 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
461 # a serie of 3 dots or a comma.
462 NEED_SPACE, # Right after a comma
463 NEED_DOT_2, # Found a dot, need a second one.
464 NEED_DOT_3, # Found second dot, need a third one.
465 NEED_COMMA, # Found third dot, need a comma.
466 ) = range(8)
467
468 state = NEED_QUOTE
469 out = []
470 for index, char in enumerate(text):
471 if char == '"':
472 if state == NEED_QUOTE:
473 state = INSIDE_STRING
474 # A new argument was found.
475 out.append('')
476 elif state == INSIDE_STRING:
477 # The argument is now closed.
478 state = NEED_COMMA_OR_DOT
479 elif state == ESCAPED:
480 out[-1] += char
481 state = INSIDE_STRING
482 else:
483 raise ValueError(
484 'Can\'t process char at column %d for: %r' % (index, text),
485 index,
486 text)
487 elif char == ',':
488 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
489 state = NEED_SPACE
490 elif state == INSIDE_STRING:
491 out[-1] += char
492 elif state == ESCAPED:
493 out[-1] += char
494 state = INSIDE_STRING
495 else:
496 raise ValueError(
497 'Can\'t process char at column %d for: %r' % (index, text),
498 index,
499 text)
500 elif char == ' ':
501 if state == NEED_SPACE:
502 state = NEED_QUOTE
503 elif state == INSIDE_STRING:
504 out[-1] += char
505 elif state == ESCAPED:
506 out[-1] += char
507 state = INSIDE_STRING
508 else:
509 raise ValueError(
510 'Can\'t process char at column %d for: %r' % (index, text),
511 index,
512 text)
513 elif char == '.':
514 if state == NEED_COMMA_OR_DOT:
515 # The string is incomplete, this mean the strace -s flag should be
516 # increased.
517 state = NEED_DOT_2
518 elif state == NEED_DOT_2:
519 state = NEED_DOT_3
520 elif state == NEED_DOT_3:
521 state = NEED_COMMA
522 elif state == INSIDE_STRING:
523 out[-1] += char
524 elif state == ESCAPED:
525 out[-1] += char
526 state = INSIDE_STRING
527 else:
528 raise ValueError(
529 'Can\'t process char at column %d for: %r' % (index, text),
530 index,
531 text)
532 elif char == '\\':
533 if state == ESCAPED:
534 out[-1] += char
535 state = INSIDE_STRING
536 elif state == INSIDE_STRING:
537 state = ESCAPED
538 else:
539 raise ValueError(
540 'Can\'t process char at column %d for: %r' % (index, text),
541 index,
542 text)
543 else:
544 if state == INSIDE_STRING:
545 out[-1] += char
546 else:
547 raise ValueError(
548 'Can\'t process char at column %d for: %r' % (index, text),
549 index,
550 text)
551 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
552 raise ValueError(
553 'String is incorrectly terminated: %r' % text,
554 text)
555 return out
556
557
558def read_json(filepath):
559 with open(filepath, 'r') as f:
560 return json.load(f)
561
562
563def write_json(filepath_or_handle, data, dense):
564 """Writes data into filepath or file handle encoded as json.
565
566 If dense is True, the json is packed. Otherwise, it is human readable.
567 """
568 if hasattr(filepath_or_handle, 'write'):
569 if dense:
570 filepath_or_handle.write(
571 json.dumps(data, sort_keys=True, separators=(',',':')))
572 else:
573 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
574 else:
575 with open(filepath_or_handle, 'wb') as f:
576 if dense:
577 json.dump(data, f, sort_keys=True, separators=(',',':'))
578 else:
579 json.dump(data, f, sort_keys=True, indent=2)
580
581
582class Results(object):
583 """Results of a trace session."""
584
585 class _TouchedObject(object):
586 """Something, a file or a directory, that was accessed."""
587 def __init__(self, root, path, tainted, size, nb_files):
588 logging.debug(
589 '%s(%s, %s, %s, %s, %s)' %
590 (self.__class__.__name__, root, path, tainted, size, nb_files))
591 self.root = root
592 self.path = path
593 self.tainted = tainted
594 self.nb_files = nb_files
595 # Can be used as a cache or a default value, depending on context.
596 self._size = size
597 # These are cache only.
598 self._real_path = None
599
600 # Check internal consistency.
601 assert path, path
602 assert tainted or bool(root) != bool(isabs(path)), (root, path)
603 assert tainted or (
604 not os.path.exists(self.full_path) or
605 (self.full_path == get_native_path_case(self.full_path))), (
606 tainted, self.full_path, get_native_path_case(self.full_path))
607
608 @property
609 def existent(self):
610 return self.size != -1
611
612 @property
613 def full_path(self):
614 if self.root:
615 return os.path.join(self.root, self.path)
616 return self.path
617
618 @property
619 def real_path(self):
620 """Returns the path with symlinks resolved."""
621 if not self._real_path:
622 self._real_path = os.path.realpath(self.full_path)
623 return self._real_path
624
625 @property
626 def size(self):
627 """File's size. -1 is not existent."""
628 if self._size is None and not self.tainted:
629 try:
630 self._size = os.stat(self.full_path).st_size
631 except OSError:
632 self._size = -1
633 return self._size
634
635 def flatten(self):
636 """Returns a dict representing this object.
637
638 A 'size' of 0 means the file was only touched and not read.
639 """
640 return {
641 'path': self.path,
642 'size': self.size,
643 }
644
645 def replace_variables(self, variables):
646 """Replaces the root of this File with one of the variables if it matches.
647
648 If a variable replacement occurs, the cloned object becomes tainted.
649 """
650 for variable, root_path in variables.iteritems():
651 if self.path.startswith(root_path):
652 return self._clone(
653 self.root, variable + self.path[len(root_path):], True)
654 # No need to clone, returns ourself.
655 return self
656
657 def strip_root(self, root):
658 """Returns a clone of itself with 'root' stripped off."""
659 # Check internal consistency.
660 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root
661 if not self.full_path.startswith(root):
662 # Now try to resolve the symlinks to see if it can be reached this way.
663 # Only try *after* trying without resolving symlink.
664 if not self.real_path.startswith(root):
665 return None
666 path = self.real_path
667 else:
668 path = self.full_path
669 return self._clone(root, path[len(root):], self.tainted)
670
671 def _clone(self, new_root, new_path, tainted):
672 raise NotImplementedError(self.__class__.__name__)
673
674 class File(_TouchedObject):
675 """A file that was accessed. May not be present anymore.
676
677 If tainted is true, it means it is not a real path anymore as a variable
678 replacement occured.
679
680 If touched_only is True, this means the file was probed for existence, and
681 it is existent, but was never _opened_. If touched_only is True, the file
682 must have existed.
683 """
684 def __init__(self, root, path, tainted, size):
685 super(Results.File, self).__init__(root, path, tainted, size, 1)
686
687 def _clone(self, new_root, new_path, tainted):
688 """Clones itself keeping meta-data."""
689 # Keep the self.size and self._real_path caches for performance reason. It
690 # is also important when the file becomes tainted (with a variable instead
691 # of the real path) since self.path is not an on-disk path anymore so
692 # out._size cannot be updated.
693 out = self.__class__(new_root, new_path, tainted, self.size)
694 out._real_path = self._real_path
695 return out
696
697 class Directory(_TouchedObject):
698 """A directory of files. Must exist."""
699 def __init__(self, root, path, tainted, size, nb_files):
700 """path='.' is a valid value and must be handled appropriately."""
701 assert not path.endswith(os.path.sep), path
702 super(Results.Directory, self).__init__(
703 root, path + os.path.sep, tainted, size, nb_files)
704 # For a Directory instance, self.size is not a cache, it's an actual value
705 # that is never modified and represents the total size of the files
706 # contained in this directory. It is possible that the directory is empty
707 # so that size == 0; this happens if there's only an invalid symlink in
708 # it.
709
710 def flatten(self):
711 out = super(Results.Directory, self).flatten()
712 out['nb_files'] = self.nb_files
713 return out
714
715 def _clone(self, new_root, new_path, tainted):
716 """Clones itself keeping meta-data."""
717 out = self.__class__(
718 new_root,
719 new_path.rstrip(os.path.sep),
720 tainted,
721 self.size,
722 self.nb_files)
723 out._real_path = self._real_path
724 return out
725
726 class Process(object):
727 """A process that was traced.
728
729 Contains references to the files accessed by this process and its children.
730 """
731 def __init__(self, pid, files, executable, command, initial_cwd, children):
732 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
733 self.pid = pid
734 self.files = sorted(files, key=lambda x: x.path)
735 self.children = children
736 self.executable = executable
737 self.command = command
738 self.initial_cwd = initial_cwd
739
740 # Check internal consistency.
741 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
742 f.path for f in self.files)
743 assert isinstance(self.children, list)
744 assert isinstance(self.files, list)
745
746 @property
747 def all(self):
748 for child in self.children:
749 for i in child.all:
750 yield i
751 yield self
752
753 def flatten(self):
754 return {
755 'children': [c.flatten() for c in self.children],
756 'command': self.command,
757 'executable': self.executable,
758 'files': [f.flatten() for f in self.files],
759 'initial_cwd': self.initial_cwd,
760 'pid': self.pid,
761 }
762
763 def strip_root(self, root):
764 assert isabs(root) and root.endswith(os.path.sep), root
765 # Loads the files after since they are constructed as objects.
766 out = self.__class__(
767 self.pid,
768 filter(None, (f.strip_root(root) for f in self.files)),
769 self.executable,
770 self.command,
771 self.initial_cwd,
772 [c.strip_root(root) for c in self.children])
773 logging.debug(
774 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
775 return out
776
777 def __init__(self, process):
778 self.process = process
779 # Cache.
780 self._files = None
781
782 def flatten(self):
783 return {
784 'root': self.process.flatten(),
785 }
786
787 @property
788 def files(self):
789 if self._files is None:
790 self._files = sorted(
791 sum((p.files for p in self.process.all), []),
792 key=lambda x: x.path)
793 return self._files
794
795 @property
796 def existent(self):
797 return [f for f in self.files if f.existent]
798
799 @property
800 def non_existent(self):
801 return [f for f in self.files if not f.existent]
802
803 def strip_root(self, root):
804 """Returns a clone with all the files outside the directory |root| removed
805 and converts all the path to be relative paths.
806 """
807 # Resolve any symlink
808 root = os.path.realpath(root)
809 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep
810 logging.debug('strip_root(%s)' % root)
811 return Results(self.process.strip_root(root))
812
813
814class ApiBase(object):
815 """OS-agnostic API to trace a process and its children."""
816 class Context(object):
817 """Processes one log line at a time and keeps the list of traced processes.
818
819 The parsing is complicated by the fact that logs are traced out of order for
820 strace but in-order for dtrace and logman. In addition, on Windows it is
821 very frequent that processids are reused so a flat list cannot be used. But
822 at the same time, it is impossible to faithfully construct a graph when the
823 logs are processed out of order. So both a tree and a flat mapping are used,
824 the tree is the real process tree, while the flat mapping stores the last
825 valid process for the corresponding processid. For the strace case, the
826 tree's head is guessed at the last moment.
827 """
828 class Process(object):
829 """Keeps context for one traced child process.
830
831 Logs all the files this process touched. Ignores directories.
832 """
833 def __init__(self, blacklist, pid, initial_cwd):
834 # Check internal consistency.
835 assert isinstance(pid, int), repr(pid)
836 self.pid = pid
837 # children are Process instances.
838 self.children = []
839 self.initial_cwd = initial_cwd
840 self.cwd = None
841 self.files = set()
842 self.only_touched = set()
843 self.executable = None
844 self.command = None
845 self._blacklist = blacklist
846
847 def to_results_process(self):
848 """Resolves file case sensitivity and or late-bound strings."""
849 # When resolving files, it's normal to get dupe because a file could be
850 # opened multiple times with different case. Resolve the deduplication
851 # here.
852 def fix_path(x):
853 """Returns the native file path case.
854
855 Converts late-bound strings.
856 """
857 if not x:
858 # Do not convert None instance to 'None'.
859 return x
860 # TODO(maruel): Do not upconvert to unicode here, on linux we don't
861 # know the file path encoding so they must be treated as bytes.
862 x = unicode(x)
863 if os.path.isabs(x):
864 # If the path is not absolute, which tends to happen occasionally on
865 # Windows, it is not possible to get the native path case so ignore
866 # that trace. It mostly happens for 'executable' value.
867 x = get_native_path_case(x)
868 return x
869
870 def fix_and_blacklist_path(x):
871 x = fix_path(x)
872 if not x:
873 return
874 # The blacklist needs to be reapplied, since path casing could
875 # influence blacklisting.
876 if self._blacklist(x):
877 return
878 return x
879
880 # Filters out directories. Some may have passed through.
881 files = set(f for f in map(fix_and_blacklist_path, self.files) if f)
882 only_touched = set(
883 f for f in map(fix_and_blacklist_path, self.only_touched) if f)
884 only_touched -= files
885
886 files = [
887 Results.File(None, f, False, None) for f in files
888 if not os.path.isdir(f)
889 ]
890 # Using 0 as size means the file's content is ignored since the file was
891 # never opened for I/O.
892 files.extend(
893 Results.File(None, f, False, 0) for f in only_touched
894 if not os.path.isdir(f)
895 )
896 return Results.Process(
897 self.pid,
898 files,
899 fix_path(self.executable),
900 self.command,
901 fix_path(self.initial_cwd),
902 [c.to_results_process() for c in self.children])
903
904 def add_file(self, filepath, touch_only):
905 """Adds a file if it passes the blacklist."""
906 if self._blacklist(unicode(filepath)):
907 return
908 logging.debug('add_file(%d, %s, %s)' % (self.pid, filepath, touch_only))
909 # Note that filepath and not unicode(filepath) is added. It is because
910 # filepath could be something else than a string, like a RelativePath
911 # instance for dtrace logs.
912 if touch_only:
913 self.only_touched.add(filepath)
914 else:
915 self.files.add(filepath)
916
917 def __init__(self, blacklist):
918 self.blacklist = blacklist
919 # Initial process.
920 self.root_process = None
921 # dict to accelerate process lookup, to not have to lookup the whole graph
922 # each time.
923 self._process_lookup = {}
924
925 class Tracer(object):
926 """During it's lifetime, the tracing subsystem is enabled."""
927 def __init__(self, logname):
928 self._logname = logname
929 self._lock = threading.Lock()
930 self._traces = []
931 self._initialized = True
932
933 def trace(self, cmd, cwd, tracename, output):
934 """Runs the OS-specific trace program on an executable.
935
936 Arguments:
937 - cmd: The command (a list) to run.
938 - cwd: Current directory to start the child process in.
939 - tracename: Name of the trace in the logname file.
940 - output: If False, redirects output to PIPEs.
941
942 Returns a tuple (resultcode, output) and updates the internal trace
943 entries.
944 """
945 # The implementation adds an item to self._traces.
946 raise NotImplementedError(self.__class__.__name__)
947
948 def close(self, _timeout=None):
949 """Saves the meta-data in the logname file.
950
951 For kernel-based tracing, stops the tracing subsystem.
952
953 Must not be used manually when using 'with' construct.
954 """
955 with self._lock:
956 assert self._initialized
957 try:
958 data = {
959 'traces': self._traces,
960 }
961 write_json(self._logname, data, False)
962 finally:
963 self._initialized = False
964
965 def post_process_log(self):
966 """Post-processes the log so it becomes faster to load afterward.
967
968 Must not be used manually when using 'with' construct.
969 """
970 assert not self._initialized, 'Must stop tracing first.'
971
972 def __enter__(self):
973 """Enables 'with' statement."""
974 return self
975
976 def __exit__(self, exc_type, exc_value, traceback):
977 """Enables 'with' statement."""
978 self.close()
979 # If an exception was thrown, do not process logs.
980 if not exc_type:
981 self.post_process_log()
982
983 def get_tracer(self, logname):
984 """Returns an ApiBase.Tracer instance.
985
986 Initializes the tracing subsystem, which is a requirement for kernel-based
987 tracers. Only one tracer instance should be live at a time!
988
989 logname is the filepath to the json file that will contain the meta-data
990 about the logs.
991 """
992 return self.Tracer(logname)
993
994 @staticmethod
995 def clean_trace(logname):
996 """Deletes an old log."""
997 raise NotImplementedError()
998
999 @classmethod
1000 def parse_log(cls, logname, blacklist):
1001 """Processes trace logs and returns the files opened and the files that do
1002 not exist.
1003
1004 It does not track directories.
1005
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001006 |logname| must be an absolute path.
1007
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001008 Most of the time, files that do not exist are temporary test files that
1009 should be put in /tmp instead. See http://crbug.com/116251.
1010
1011 Returns a list of dict with keys:
1012 - results: A Results instance.
1013 - trace: The corresponding tracename parameter provided to
1014 get_tracer().trace().
1015 - output: Output gathered during execution, if get_tracer().trace(...,
1016 output=False) was used.
1017 """
1018 raise NotImplementedError(cls.__class__.__name__)
1019
1020
1021class Strace(ApiBase):
1022 """strace implies linux."""
1023 class Context(ApiBase.Context):
1024 """Processes a strace log line and keeps the list of existent and non
1025 existent files accessed.
1026
1027 Ignores directories.
1028
1029 Uses late-binding to processes the cwd of each process. The problem is that
1030 strace generates one log file per process it traced but doesn't give any
1031 information about which process was started when and by who. So we don't
1032 even know which process is the initial one. So process the logs out of
1033 order and use late binding with RelativePath to be able to deduce the
1034 initial directory of each process once all the logs are parsed.
1035 """
1036 class Process(ApiBase.Context.Process):
1037 """Represents the state of a process.
1038
1039 Contains all the information retrieved from the pid-specific log.
1040 """
1041 # Function names are using ([a-z_0-9]+)
1042 # This is the most common format. function(args) = result
1043 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.+?)\)\s+= (.+)$')
1044 # An interrupted function call, only grab the minimal header.
1045 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001046 # An interrupted function call, with the process exiting. It must be the
1047 # last line in the log.
1048 RE_UNFINISHED_EXIT = re.compile(
1049 r'^([^\(]+)(.*) \<unfinished \.\.\.\ exit status \d+>$')
maruel@chromium.org4e506502012-10-18 15:02:58 +00001050 # An interrupted function call the hard way. Usually observed with futex()
1051 # on ubuntu 12.04.
csharp@chromium.org4d753552012-10-25 19:31:55 +00001052 RE_INTERRUPTED_HARD = re.compile(r'^([^\(]+)\([A-Z0-9a-fx\,\_\|\{\} ]*$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001053 # A resumed function call.
1054 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
1055 # A process received a signal.
1056 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
1057 # A process didn't handle a signal. Ignore any junk appearing before,
1058 # because the process was forcibly killed so it won't open any new file.
1059 RE_KILLED = re.compile(
1060 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
1061 # The process has exited.
1062 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
1063 # A call was canceled. Ignore any prefix.
1064 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
1065 # Happens when strace fails to even get the function name.
1066 UNNAMED_FUNCTION = '????'
1067
1068 # Corner-case in python, a class member function decorator must not be
1069 # @staticmethod.
1070 def parse_args(regexp, expect_zero): # pylint: disable=E0213
1071 """Automatically convert the str 'args' into a list of processed
1072 arguments.
1073
1074 Arguments:
1075 - regexp is used to parse args.
1076 - expect_zero: one of True, False or None.
1077 - True: will check for result.startswith('0') first and will ignore
1078 the trace line completely otherwise. This is important because for
1079 many functions, the regexp will not process if the call failed.
1080 - False: will check for not result.startswith(('?', '-1')) for the
1081 same reason than with True.
1082 - None: ignore result.
1083 """
1084 def meta_hook(function):
1085 assert function.__name__.startswith('handle_')
1086 def hook(self, args, result):
1087 if expect_zero is True and not result.startswith('0'):
1088 return
1089 if expect_zero is False and result.startswith(('?', '-1')):
1090 return
1091 match = re.match(regexp, args)
1092 if not match:
1093 raise TracingFailure(
1094 'Failed to parse %s(%s) = %s' %
1095 (function.__name__[len('handle_'):], args, result),
1096 None, None, None)
1097 return function(self, match.groups(), result)
1098 return hook
1099 return meta_hook
1100
1101 class RelativePath(object):
1102 """A late-bound relative path."""
1103 def __init__(self, parent, value):
1104 self.parent = parent
1105 self.value = value
1106
1107 def render(self):
1108 """Returns the current directory this instance is representing.
1109
1110 This function is used to return the late-bound value.
1111 """
1112 if self.value and self.value.startswith(u'/'):
1113 # An absolute path.
1114 return self.value
1115 parent = self.parent.render() if self.parent else u'<None>'
1116 if self.value:
1117 return os.path.normpath(os.path.join(parent, self.value))
1118 return parent
1119
1120 def __unicode__(self):
1121 """Acts as a string whenever needed."""
1122 return unicode(self.render())
1123
1124 def __str__(self):
1125 """Acts as a string whenever needed."""
1126 return str(self.render())
1127
1128 def __init__(self, root, pid):
1129 """Keeps enough information to be able to guess the original process
1130 root.
1131
1132 strace doesn't store which process was the initial process. So more
1133 information needs to be kept so the graph can be reconstructed from the
1134 flat map.
1135 """
1136 logging.info('%s(%d)' % (self.__class__.__name__, pid))
1137 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
1138 assert isinstance(root, ApiBase.Context)
1139 self._root = weakref.ref(root)
1140 # The dict key is the function name of the pending call, like 'open'
1141 # or 'execve'.
1142 self._pending_calls = {}
1143 self._line_number = 0
1144 # Current directory when the process started.
1145 self.initial_cwd = self.RelativePath(self._root(), None)
1146 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001147 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001148
1149 def get_cwd(self):
1150 """Returns the best known value of cwd."""
1151 return self.cwd or self.initial_cwd
1152
1153 def render(self):
1154 """Returns the string value of the RelativePath() object.
1155
1156 Used by RelativePath. Returns the initial directory and not the
1157 current one since the current directory 'cwd' validity is time-limited.
1158
1159 The validity is only guaranteed once all the logs are processed.
1160 """
1161 return self.initial_cwd.render()
1162
1163 def on_line(self, line):
1164 self._line_number += 1
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001165 if self._done:
1166 raise TracingFailure(
1167 'Found a trace for a terminated process',
1168 None, None, None)
1169
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001170 if self.RE_SIGNAL.match(line):
1171 # Ignore signals.
1172 return
1173
1174 try:
1175 match = self.RE_KILLED.match(line)
1176 if match:
1177 # Converts a '+++ killed by Foo +++' trace into an exit_group().
1178 self.handle_exit_group(match.group(1), None)
1179 return
1180
1181 match = self.RE_PROCESS_EXITED.match(line)
1182 if match:
1183 # Converts a '+++ exited with 1 +++' trace into an exit_group()
1184 self.handle_exit_group(match.group(1), None)
1185 return
1186
1187 match = self.RE_UNFINISHED.match(line)
1188 if match:
1189 if match.group(1) in self._pending_calls:
1190 raise TracingFailure(
1191 'Found two unfinished calls for the same function',
1192 None, None, None,
1193 self._pending_calls)
1194 self._pending_calls[match.group(1)] = (
1195 match.group(1) + match.group(2))
1196 return
1197
maruel@chromium.org4e506502012-10-18 15:02:58 +00001198 match = (
1199 self.RE_UNFINISHED_EXIT.match(line) or
1200 self.RE_INTERRUPTED_HARD.match(line))
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001201 if match:
1202 # The process died. No other line can be processed afterward.
1203 self._done = True
1204 return
1205
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001206 match = self.RE_UNAVAILABLE.match(line)
1207 if match:
1208 # This usually means a process was killed and a pending call was
1209 # canceled.
1210 # TODO(maruel): Look up the last exit_group() trace just above and
1211 # make sure any self._pending_calls[anything] is properly flushed.
1212 return
1213
1214 match = self.RE_RESUMED.match(line)
1215 if match:
1216 if match.group(1) not in self._pending_calls:
1217 raise TracingFailure(
1218 'Found a resumed call that was not logged as unfinished',
1219 None, None, None,
1220 self._pending_calls)
1221 pending = self._pending_calls.pop(match.group(1))
1222 # Reconstruct the line.
1223 line = pending + match.group(2)
1224
1225 match = self.RE_HEADER.match(line)
1226 if not match:
1227 raise TracingFailure(
1228 'Found an invalid line: %s' % line,
1229 None, None, None)
1230 if match.group(1) == self.UNNAMED_FUNCTION:
1231 return
1232
1233 # It's a valid line, handle it.
1234 handler = getattr(self, 'handle_%s' % match.group(1), None)
1235 if not handler:
1236 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1237 return handler(match.group(2), match.group(3))
1238 except TracingFailure, e:
1239 # Hack in the values since the handler could be a static function.
1240 e.pid = self.pid
1241 e.line = line
1242 e.line_number = self._line_number
1243 # Re-raise the modified exception.
1244 raise
1245 except (KeyError, NotImplementedError, ValueError), e:
1246 raise TracingFailure(
1247 'Trace generated a %s exception: %s' % (
1248 e.__class__.__name__, str(e)),
1249 self.pid,
1250 self._line_number,
1251 line,
1252 e)
1253
1254 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1255 def handle_access(self, args, _result):
1256 self._handle_file(args[0], True)
1257
1258 @parse_args(r'^\"(.+?)\"$', True)
1259 def handle_chdir(self, args, _result):
1260 """Updates cwd."""
1261 self.cwd = self.RelativePath(self, args[0])
1262 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1263
1264 def handle_clone(self, _args, result):
1265 """Transfers cwd."""
1266 if result.startswith(('?', '-1')):
1267 # The call failed.
1268 return
1269 # Update the other process right away.
1270 childpid = int(result)
1271 child = self._root().get_or_set_proc(childpid)
1272 if child.parentid is not None or childpid in self.children:
1273 raise TracingFailure(
1274 'Found internal inconsitency in process lifetime detection '
1275 'during a clone() call',
1276 None, None, None)
1277
1278 # Copy the cwd object.
1279 child.initial_cwd = self.get_cwd()
1280 child.parentid = self.pid
1281 # It is necessary because the logs are processed out of order.
1282 self.children.append(child)
1283
1284 def handle_close(self, _args, _result):
1285 pass
1286
1287 def handle_chmod(self, _args, _result):
1288 pass
1289
1290 def handle_creat(self, _args, _result):
1291 # Ignore files created, since they didn't need to exist.
1292 pass
1293
1294 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1295 def handle_execve(self, args, _result):
1296 # Even if in practice execve() doesn't returns when it succeeds, strace
1297 # still prints '0' as the result.
1298 filepath = args[0]
1299 self._handle_file(filepath, False)
1300 self.executable = self.RelativePath(self.get_cwd(), filepath)
1301 self.command = strace_process_quoted_arguments(args[1])
1302
1303 def handle_exit_group(self, _args, _result):
1304 """Removes cwd."""
1305 self.cwd = None
1306
1307 def handle_fork(self, args, result):
1308 self._handle_unknown('fork', args, result)
1309
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001310 def handle_futex(self, _args, _result):
1311 pass
1312
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001313 def handle_getcwd(self, _args, _result):
1314 pass
1315
1316 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1317 def handle_link(self, args, _result):
1318 self._handle_file(args[0], False)
1319 self._handle_file(args[1], False)
1320
1321 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1322 def handle_lstat(self, args, _result):
1323 self._handle_file(args[0], True)
1324
1325 def handle_mkdir(self, _args, _result):
1326 pass
1327
1328 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1329 def handle_open(self, args, _result):
1330 if 'O_DIRECTORY' in args[1]:
1331 return
1332 self._handle_file(args[0], False)
1333
1334 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1335 def handle_openat(self, args, _result):
1336 if 'O_DIRECTORY' in args[2]:
1337 return
1338 if args[1] == 'AT_FDCWD':
1339 self._handle_file(args[1], False)
1340 else:
1341 # TODO(maruel): Implement relative open if necessary instead of the
1342 # AT_FDCWD flag, let's hope not since this means tracking all active
1343 # directory handles.
1344 raise Exception('Relative open via openat not implemented.')
1345
1346 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1347 def handle_readlink(self, args, _result):
1348 self._handle_file(args[0], False)
1349
1350 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1351 def handle_rename(self, args, _result):
1352 self._handle_file(args[0], False)
1353 self._handle_file(args[1], False)
1354
1355 def handle_rmdir(self, _args, _result):
1356 pass
1357
1358 def handle_setxattr(self, _args, _result):
1359 pass
1360
1361 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1362 def handle_stat(self, args, _result):
1363 self._handle_file(args[0], True)
1364
1365 def handle_symlink(self, _args, _result):
1366 pass
1367
1368 def handle_unlink(self, _args, _result):
1369 # In theory, the file had to be created anyway.
1370 pass
1371
1372 def handle_statfs(self, _args, _result):
1373 pass
1374
1375 def handle_vfork(self, args, result):
1376 self._handle_unknown('vfork', args, result)
1377
1378 @staticmethod
1379 def _handle_unknown(function, args, result):
1380 raise TracingFailure(
1381 'Unexpected/unimplemented trace %s(%s)= %s' %
1382 (function, args, result),
1383 None, None, None)
1384
1385 def _handle_file(self, filepath, touch_only):
1386 filepath = self.RelativePath(self.get_cwd(), filepath)
1387 #assert not touch_only, unicode(filepath)
1388 self.add_file(filepath, touch_only)
1389
1390 def __init__(self, blacklist, initial_cwd):
1391 super(Strace.Context, self).__init__(blacklist)
1392 self.initial_cwd = initial_cwd
1393
1394 def render(self):
1395 """Returns the string value of the initial cwd of the root process.
1396
1397 Used by RelativePath.
1398 """
1399 return self.initial_cwd
1400
1401 def on_line(self, pid, line):
1402 """Transfers control into the Process.on_line() function."""
1403 self.get_or_set_proc(pid).on_line(line.strip())
1404
1405 def to_results(self):
1406 """Finds back the root process and verify consistency."""
1407 # TODO(maruel): Absolutely unecessary, fix me.
1408 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1409 if len(root) != 1:
1410 raise TracingFailure(
1411 'Found internal inconsitency in process lifetime detection '
1412 'while finding the root process',
1413 None,
1414 None,
1415 None,
1416 sorted(p.pid for p in root))
1417 self.root_process = root[0]
1418 process = self.root_process.to_results_process()
1419 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1420 raise TracingFailure(
1421 'Found internal inconsitency in process lifetime detection '
1422 'while looking for len(tree) == len(list)',
1423 None,
1424 None,
1425 None,
1426 sorted(self._process_lookup),
1427 sorted(p.pid for p in process.all))
1428 return Results(process)
1429
1430 def get_or_set_proc(self, pid):
1431 """Returns the Context.Process instance for this pid or creates a new one.
1432 """
1433 if not pid or not isinstance(pid, int):
1434 raise TracingFailure(
1435 'Unpexpected value for pid: %r' % pid,
1436 pid,
1437 None,
1438 None,
1439 pid)
1440 if pid not in self._process_lookup:
1441 self._process_lookup[pid] = self.Process(self, pid)
1442 return self._process_lookup[pid]
1443
1444 @classmethod
1445 def traces(cls):
1446 """Returns the list of all handled traces to pass this as an argument to
1447 strace.
1448 """
1449 prefix = 'handle_'
1450 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1451
1452 class Tracer(ApiBase.Tracer):
1453 MAX_LEN = 256
1454
1455 def trace(self, cmd, cwd, tracename, output):
1456 """Runs strace on an executable."""
1457 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1458 assert os.path.isabs(cmd[0]), cmd[0]
1459 assert os.path.isabs(cwd), cwd
1460 assert os.path.normpath(cwd) == cwd, cwd
1461 with self._lock:
1462 if not self._initialized:
1463 raise TracingFailure(
1464 'Called Tracer.trace() on an unitialized object',
1465 None, None, None, tracename)
1466 assert tracename not in (i['trace'] for i in self._traces)
1467 stdout = stderr = None
1468 if output:
1469 stdout = subprocess.PIPE
1470 stderr = subprocess.STDOUT
1471 # Ensure all file related APIs are hooked.
1472 traces = ','.join(Strace.Context.traces() + ['file'])
1473 trace_cmd = [
1474 'strace',
1475 '-ff',
1476 '-s', '%d' % self.MAX_LEN,
1477 '-e', 'trace=%s' % traces,
1478 '-o', self._logname + '.' + tracename,
1479 ]
1480 child = subprocess.Popen(
1481 trace_cmd + cmd,
1482 cwd=cwd,
1483 stdin=subprocess.PIPE,
1484 stdout=stdout,
1485 stderr=stderr)
1486 out = child.communicate()[0]
1487 # TODO(maruel): Walk the logs and figure out the root process would
1488 # simplify parsing the logs a *lot*.
1489 with self._lock:
1490 assert tracename not in (i['trace'] for i in self._traces)
1491 self._traces.append(
1492 {
1493 'cmd': cmd,
1494 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001495 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001496 # The pid of strace process, not very useful.
1497 'pid': child.pid,
1498 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001499 })
1500 return child.returncode, out
1501
1502 @staticmethod
1503 def clean_trace(logname):
1504 if os.path.isfile(logname):
1505 os.remove(logname)
1506 # Also delete any pid specific file from previous traces.
1507 for i in glob.iglob(logname + '.*'):
1508 if i.rsplit('.', 1)[1].isdigit():
1509 os.remove(i)
1510
1511 @classmethod
1512 def parse_log(cls, logname, blacklist):
1513 logging.info('parse_log(%s, %s)' % (logname, blacklist))
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001514 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001515 data = read_json(logname)
1516 out = []
1517 for item in data['traces']:
1518 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001519 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001520 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001521 }
1522 try:
1523 context = cls.Context(blacklist, item['cwd'])
1524 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
1525 pid = pidfile.rsplit('.', 1)[1]
1526 if pid.isdigit():
1527 pid = int(pid)
1528 # TODO(maruel): Load as utf-8
1529 for line in open(pidfile, 'rb'):
1530 context.on_line(pid, line)
1531 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001532 except TracingFailure:
1533 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001534 out.append(result)
1535 return out
1536
1537
1538class Dtrace(ApiBase):
1539 """Uses DTrace framework through dtrace. Requires root access.
1540
1541 Implies Mac OSX.
1542
1543 dtruss can't be used because it has compatibility issues with python.
1544
1545 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1546 get the absolute path of the 'cwd' dtrace variable from the probe.
1547
1548 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1549 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1550 manually.
1551
1552 errno is not printed in the log since this implementation currently only cares
1553 about files that were successfully opened.
1554 """
1555 class Context(ApiBase.Context):
1556 # Format: index pid function(args)
1557 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1558
1559 # Arguments parsing.
1560 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1561 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1562 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1563 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1564 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1565 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1566
1567 O_DIRECTORY = 0x100000
1568
1569 class Process(ApiBase.Context.Process):
1570 def __init__(self, *args):
1571 super(Dtrace.Context.Process, self).__init__(*args)
1572 self.cwd = self.initial_cwd
1573
1574 def __init__(self, blacklist, tracer_pid, initial_cwd):
1575 logging.info(
1576 '%s(%d, %s)' % (self.__class__.__name__, tracer_pid, initial_cwd))
1577 super(Dtrace.Context, self).__init__(blacklist)
1578 # Process ID of the temporary script created by create_thunk().
1579 self._tracer_pid = tracer_pid
1580 self._initial_cwd = initial_cwd
1581 self._line_number = 0
1582
1583 def on_line(self, line):
1584 self._line_number += 1
1585 match = self.RE_HEADER.match(line)
1586 if not match:
1587 raise TracingFailure(
1588 'Found malformed line: %s' % line,
1589 None,
1590 self._line_number,
1591 line)
1592 fn = getattr(
1593 self,
1594 'handle_%s' % match.group(2).replace('-', '_'),
1595 self._handle_ignored)
1596 # It is guaranteed to succeed because of the regexp. Or at least I thought
1597 # it would.
1598 pid = int(match.group(1))
1599 try:
1600 return fn(pid, match.group(3))
1601 except TracingFailure, e:
1602 # Hack in the values since the handler could be a static function.
1603 e.pid = pid
1604 e.line = line
1605 e.line_number = self._line_number
1606 # Re-raise the modified exception.
1607 raise
1608 except (KeyError, NotImplementedError, ValueError), e:
1609 raise TracingFailure(
1610 'Trace generated a %s exception: %s' % (
1611 e.__class__.__name__, str(e)),
1612 pid,
1613 self._line_number,
1614 line,
1615 e)
1616
1617 def to_results(self):
1618 process = self.root_process.to_results_process()
1619 # Internal concistency check.
1620 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1621 raise TracingFailure(
1622 'Found internal inconsitency in process lifetime detection '
1623 'while looking for len(tree) == len(list)',
1624 None,
1625 None,
1626 None,
1627 sorted(self._process_lookup),
1628 sorted(p.pid for p in process.all))
1629 return Results(process)
1630
1631 def handle_dtrace_BEGIN(self, _pid, args):
1632 if not self.RE_DTRACE_BEGIN.match(args):
1633 raise TracingFailure(
1634 'Found internal inconsitency in dtrace_BEGIN log line',
1635 None, None, None)
1636
1637 def handle_proc_start(self, pid, args):
1638 """Transfers cwd.
1639
1640 The dtrace script already takes care of only tracing the processes that
1641 are child of the traced processes so there is no need to verify the
1642 process hierarchy.
1643 """
1644 if pid in self._process_lookup:
1645 raise TracingFailure(
1646 'Found internal inconsitency in proc_start: %d started two times' %
1647 pid,
1648 None, None, None)
1649 match = self.RE_PROC_START.match(args)
1650 if not match:
1651 raise TracingFailure(
1652 'Failed to parse arguments: %s' % args,
1653 None, None, None)
1654 ppid = int(match.group(1))
1655 if ppid == self._tracer_pid and not self.root_process:
1656 proc = self.root_process = self.Process(
1657 self.blacklist, pid, self._initial_cwd)
1658 elif ppid in self._process_lookup:
1659 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1660 self._process_lookup[ppid].children.append(proc)
1661 else:
1662 # Another process tree, ignore.
1663 return
1664 self._process_lookup[pid] = proc
1665 logging.debug(
1666 'New child: %s -> %d cwd:%s' %
1667 (ppid, pid, unicode(proc.initial_cwd)))
1668
1669 def handle_proc_exit(self, pid, _args):
1670 """Removes cwd."""
1671 if pid in self._process_lookup:
1672 # self._tracer_pid is not traced itself and other traces run neither.
1673 self._process_lookup[pid].cwd = None
1674
1675 def handle_execve(self, pid, args):
1676 """Sets the process' executable.
1677
1678 TODO(maruel): Read command line arguments. See
1679 https://discussions.apple.com/thread/1980539 for an example.
1680 https://gist.github.com/1242279
1681
1682 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1683 :)
1684 """
1685 if not pid in self._process_lookup:
1686 # Another process tree, ignore.
1687 return
1688 match = self.RE_EXECVE.match(args)
1689 if not match:
1690 raise TracingFailure(
1691 'Failed to parse arguments: %r' % args,
1692 None, None, None)
1693 proc = self._process_lookup[pid]
1694 proc.executable = match.group(1)
1695 proc.command = self.process_escaped_arguments(match.group(3))
1696 if int(match.group(2)) != len(proc.command):
1697 raise TracingFailure(
1698 'Failed to parse execve() arguments: %s' % args,
1699 None, None, None)
1700
1701 def handle_chdir(self, pid, args):
1702 """Updates cwd."""
1703 if pid not in self._process_lookup:
1704 # Another process tree, ignore.
1705 return
1706 cwd = self.RE_CHDIR.match(args).group(1)
1707 if not cwd.startswith('/'):
1708 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1709 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1710 else:
1711 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1712 cwd2 = cwd
1713 self._process_lookup[pid].cwd = cwd2
1714
1715 def handle_open_nocancel(self, pid, args):
1716 """Redirects to handle_open()."""
1717 return self.handle_open(pid, args)
1718
1719 def handle_open(self, pid, args):
1720 if pid not in self._process_lookup:
1721 # Another process tree, ignore.
1722 return
1723 match = self.RE_OPEN.match(args)
1724 if not match:
1725 raise TracingFailure(
1726 'Failed to parse arguments: %s' % args,
1727 None, None, None)
1728 flag = int(match.group(2), 16)
1729 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1730 # Ignore directories.
1731 return
1732 self._handle_file(pid, match.group(1))
1733
1734 def handle_rename(self, pid, args):
1735 if pid not in self._process_lookup:
1736 # Another process tree, ignore.
1737 return
1738 match = self.RE_RENAME.match(args)
1739 if not match:
1740 raise TracingFailure(
1741 'Failed to parse arguments: %s' % args,
1742 None, None, None)
1743 self._handle_file(pid, match.group(1))
1744 self._handle_file(pid, match.group(2))
1745
1746 def _handle_file(self, pid, filepath):
1747 if not filepath.startswith('/'):
1748 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1749 # We can get '..' in the path.
1750 filepath = os.path.normpath(filepath)
1751 # Sadly, still need to filter out directories here;
1752 # saw open_nocancel(".", 0, 0) = 0 lines.
1753 if os.path.isdir(filepath):
1754 return
1755 self._process_lookup[pid].add_file(filepath, False)
1756
1757 def handle_ftruncate(self, pid, args):
1758 """Just used as a signal to kill dtrace, ignoring."""
1759 pass
1760
1761 @staticmethod
1762 def _handle_ignored(pid, args):
1763 """Is called for all the event traces that are not handled."""
1764 raise NotImplementedError('Please implement me')
1765
1766 @staticmethod
1767 def process_escaped_arguments(text):
1768 """Extracts escaped arguments on a string and return the arguments as a
1769 list.
1770
1771 Implemented as an automaton.
1772
1773 Example:
1774 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1775 function will return ['python2.7', '-c', 'print("hi")]
1776 """
1777 if not text.endswith('\\0'):
1778 raise ValueError('String is not null terminated: %r' % text, text)
1779 text = text[:-2]
1780
1781 def unescape(x):
1782 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1783 out = []
1784 escaped = False
1785 for i in x:
1786 if i == '\\' and not escaped:
1787 escaped = True
1788 continue
1789 escaped = False
1790 out.append(i)
1791 return ''.join(out)
1792
1793 return [unescape(i) for i in text.split('\\001')]
1794
1795 class Tracer(ApiBase.Tracer):
1796 # pylint: disable=C0301
1797 #
1798 # To understand the following code, you'll want to take a look at:
1799 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
1800 # https://wikis.oracle.com/display/DTrace/Variables
1801 # http://docs.oracle.com/cd/E19205-01/820-4221/
1802 #
1803 # 0. Dump all the valid probes into a text file. It is important, you
1804 # want to redirect into a file and you don't want to constantly 'sudo'.
1805 # $ sudo dtrace -l > probes.txt
1806 #
1807 # 1. Count the number of probes:
1808 # $ wc -l probes.txt
1809 # 81823 # On OSX 10.7, including 1 header line.
1810 #
1811 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1812 # likes and skipping the header with NR>1:
1813 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1814 # dtrace
1815 # fbt
1816 # io
1817 # ip
1818 # lockstat
1819 # mach_trap
1820 # proc
1821 # profile
1822 # sched
1823 # syscall
1824 # tcp
1825 # vminfo
1826 #
1827 # 3. List of valid probes:
1828 # $ grep syscall probes.txt | less
1829 # or use dtrace directly:
1830 # $ sudo dtrace -l -P syscall | less
1831 #
1832 # trackedpid is an associative array where its value can be 0, 1 or 2.
1833 # 0 is for untracked processes and is the default value for items not
1834 # in the associative array.
1835 # 1 is for tracked processes.
1836 # 2 is for the script created by create_thunk() only. It is not tracked
1837 # itself but all its decendants are.
1838 #
1839 # The script will kill itself only once waiting_to_die == 1 and
1840 # current_processes == 0, so that both getlogin() was called and that
1841 # all traced processes exited.
1842 #
1843 # TODO(maruel): Use cacheable predicates. See
1844 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1845 D_CODE = """
1846 dtrace:::BEGIN {
1847 waiting_to_die = 0;
1848 current_processes = 0;
1849 logindex = 0;
1850 printf("%d %d %s_%s(\\"%s\\")\\n",
1851 logindex, PID, probeprov, probename, SCRIPT);
1852 logindex++;
1853 }
1854
1855 proc:::start /trackedpid[ppid]/ {
1856 trackedpid[pid] = 1;
1857 current_processes += 1;
1858 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1859 logindex, pid, probeprov, probename,
1860 ppid,
1861 execname,
1862 current_processes);
1863 logindex++;
1864 }
1865 /* Should use SCRIPT but there is no access to this variable at that
1866 * point. */
1867 proc:::start /ppid == PID && execname == "Python"/ {
1868 trackedpid[pid] = 2;
1869 current_processes += 1;
1870 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1871 logindex, pid, probeprov, probename,
1872 ppid,
1873 execname,
1874 current_processes);
1875 logindex++;
1876 }
1877 proc:::exit /trackedpid[pid] &&
1878 current_processes == 1 &&
1879 waiting_to_die == 1/ {
1880 trackedpid[pid] = 0;
1881 current_processes -= 1;
1882 printf("%d %d %s_%s(%d)\\n",
1883 logindex, pid, probeprov, probename,
1884 current_processes);
1885 logindex++;
1886 exit(0);
1887 }
1888 proc:::exit /trackedpid[pid]/ {
1889 trackedpid[pid] = 0;
1890 current_processes -= 1;
1891 printf("%d %d %s_%s(%d)\\n",
1892 logindex, pid, probeprov, probename,
1893 current_processes);
1894 logindex++;
1895 }
1896
1897 /* Use an arcane function to detect when we need to die */
1898 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
1899 waiting_to_die = 1;
1900 printf("%d %d %s()\\n", logindex, pid, probefunc);
1901 logindex++;
1902 }
1903 syscall::ftruncate:entry /
1904 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
1905 exit(0);
1906 }
1907
1908 syscall::open*:entry /trackedpid[pid] == 1/ {
1909 self->open_arg0 = arg0;
1910 self->open_arg1 = arg1;
1911 self->open_arg2 = arg2;
1912 }
1913 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
1914 this->open_arg0 = copyinstr(self->open_arg0);
1915 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
1916 logindex, pid, probefunc,
1917 this->open_arg0,
1918 self->open_arg1,
1919 self->open_arg2);
1920 logindex++;
1921 this->open_arg0 = 0;
1922 }
1923 syscall::open*:return /trackedpid[pid] == 1/ {
1924 self->open_arg0 = 0;
1925 self->open_arg1 = 0;
1926 self->open_arg2 = 0;
1927 }
1928
1929 syscall::rename:entry /trackedpid[pid] == 1/ {
1930 self->rename_arg0 = arg0;
1931 self->rename_arg1 = arg1;
1932 }
1933 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
1934 this->rename_arg0 = copyinstr(self->rename_arg0);
1935 this->rename_arg1 = copyinstr(self->rename_arg1);
1936 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
1937 logindex, pid, probefunc,
1938 this->rename_arg0,
1939 this->rename_arg1);
1940 logindex++;
1941 this->rename_arg0 = 0;
1942 this->rename_arg1 = 0;
1943 }
1944 syscall::rename:return /trackedpid[pid] == 1/ {
1945 self->rename_arg0 = 0;
1946 self->rename_arg1 = 0;
1947 }
1948
1949 /* Track chdir, it's painful because it is only receiving relative path.
1950 */
1951 syscall::chdir:entry /trackedpid[pid] == 1/ {
1952 self->chdir_arg0 = arg0;
1953 }
1954 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
1955 this->chdir_arg0 = copyinstr(self->chdir_arg0);
1956 printf("%d %d %s(\\"%s\\")\\n",
1957 logindex, pid, probefunc,
1958 this->chdir_arg0);
1959 logindex++;
1960 this->chdir_arg0 = 0;
1961 }
1962 syscall::chdir:return /trackedpid[pid] == 1/ {
1963 self->chdir_arg0 = 0;
1964 }
1965 """
1966
1967 # execve-specific code, tends to throw a lot of exceptions.
1968 D_CODE_EXECVE = """
1969 /* Finally what we care about! */
1970 syscall::exec*:entry /trackedpid[pid]/ {
1971 self->exec_arg0 = copyinstr(arg0);
1972 /* Incrementally probe for a NULL in the argv parameter of execve() to
1973 * figure out argc. */
1974 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
1975 * found. */
1976 self->exec_argc = 0;
1977 /* Probe for argc==1 */
1978 this->exec_argv = (user_addr_t*)copyin(
1979 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1980 self->exec_argc = this->exec_argv[self->exec_argc] ?
1981 (self->exec_argc + 1) : self->exec_argc;
1982
1983 /* Probe for argc==2 */
1984 this->exec_argv = (user_addr_t*)copyin(
1985 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1986 self->exec_argc = this->exec_argv[self->exec_argc] ?
1987 (self->exec_argc + 1) : self->exec_argc;
1988
1989 /* Probe for argc==3 */
1990 this->exec_argv = (user_addr_t*)copyin(
1991 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1992 self->exec_argc = this->exec_argv[self->exec_argc] ?
1993 (self->exec_argc + 1) : self->exec_argc;
1994
1995 /* Probe for argc==4 */
1996 this->exec_argv = (user_addr_t*)copyin(
1997 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1998 self->exec_argc = this->exec_argv[self->exec_argc] ?
1999 (self->exec_argc + 1) : self->exec_argc;
2000
2001 /* Copy the inputs strings since there is no guarantee they'll be
2002 * present after the call completed. */
2003 self->exec_argv0 = (self->exec_argc > 0) ?
2004 copyinstr(this->exec_argv[0]) : "";
2005 self->exec_argv1 = (self->exec_argc > 1) ?
2006 copyinstr(this->exec_argv[1]) : "";
2007 self->exec_argv2 = (self->exec_argc > 2) ?
2008 copyinstr(this->exec_argv[2]) : "";
2009 self->exec_argv3 = (self->exec_argc > 3) ?
2010 copyinstr(this->exec_argv[3]) : "";
2011 this->exec_argv = 0;
2012 }
2013 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2014 /* We need to join strings here, as using multiple printf() would
2015 * cause tearing when multiple threads/processes are traced.
2016 * Since it is impossible to escape a string and join it to another one,
2017 * like sprintf("%s%S", previous, more), use hackery.
2018 * Each of the elements are split with a \\1. \\0 cannot be used because
2019 * it is simply ignored. This will conflict with any program putting a
2020 * \\1 in their execve() string but this should be "rare enough" */
2021 this->args = "";
2022 /* Process exec_argv[0] */
2023 this->args = strjoin(
2024 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2025
2026 /* Process exec_argv[1] */
2027 this->args = strjoin(
2028 this->args, (self->exec_argc > 1) ? "\\1" : "");
2029 this->args = strjoin(
2030 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2031
2032 /* Process exec_argv[2] */
2033 this->args = strjoin(
2034 this->args, (self->exec_argc > 2) ? "\\1" : "");
2035 this->args = strjoin(
2036 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2037
2038 /* Process exec_argv[3] */
2039 this->args = strjoin(
2040 this->args, (self->exec_argc > 3) ? "\\1" : "");
2041 this->args = strjoin(
2042 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2043
2044 /* Prints self->exec_argc to permits verifying the internal
2045 * consistency since this code is quite fishy. */
2046 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2047 logindex, pid, probefunc,
2048 self->exec_arg0,
2049 self->exec_argc,
2050 this->args);
2051 logindex++;
2052 this->args = 0;
2053 }
2054 syscall::exec*:return /trackedpid[pid]/ {
2055 self->exec_arg0 = 0;
2056 self->exec_argc = 0;
2057 self->exec_argv0 = 0;
2058 self->exec_argv1 = 0;
2059 self->exec_argv2 = 0;
2060 self->exec_argv3 = 0;
2061 }
2062 """
2063
2064 # Code currently not used.
2065 D_EXTRANEOUS = """
2066 /* This is a good learning experience, since it traces a lot of things
2067 * related to the process and child processes.
2068 * Warning: it generates a gigantic log. For example, tracing
2069 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2070 * several minutes to execute.
2071 */
2072 /*
2073 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2074 printf("%d %d %s_%s() = %d\\n",
2075 logindex, pid, probeprov, probefunc, errno);
2076 logindex++;
2077 }
2078 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2079 printf("%d %d %s_%s() = %d\\n",
2080 logindex, pid, probeprov, probefunc, errno);
2081 logindex++;
2082 }
2083 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2084 printf("%d %d %s_%s() = %d\\n",
2085 logindex, pid, probeprov, probefunc, errno);
2086 logindex++;
2087 }
2088 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2089 printf("%d %d %s_%s() = %d\\n",
2090 logindex, pid, probeprov, probefunc, errno);
2091 logindex++;
2092 }
2093 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2094 printf("%d %d %s_%s() = %d\\n",
2095 logindex, pid, probeprov, probefunc, errno);
2096 logindex++;
2097 }
2098 */
2099 /* TODO(maruel): *stat* functions and friends
2100 syscall::access:return,
2101 syscall::chdir:return,
2102 syscall::chflags:return,
2103 syscall::chown:return,
2104 syscall::chroot:return,
2105 syscall::getattrlist:return,
2106 syscall::getxattr:return,
2107 syscall::lchown:return,
2108 syscall::lstat64:return,
2109 syscall::lstat:return,
2110 syscall::mkdir:return,
2111 syscall::pathconf:return,
2112 syscall::readlink:return,
2113 syscall::removexattr:return,
2114 syscall::setxattr:return,
2115 syscall::stat64:return,
2116 syscall::stat:return,
2117 syscall::truncate:return,
2118 syscall::unlink:return,
2119 syscall::utimes:return,
2120 */
2121 """
2122
2123 def __init__(self, logname):
2124 """Starts the log collection with dtrace.
2125
2126 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2127 this needs to wait for dtrace to be "warmed up".
2128 """
2129 super(Dtrace.Tracer, self).__init__(logname)
2130 self._script = create_thunk()
2131 # This unique dummy temp file is used to signal the dtrace script that it
2132 # should stop as soon as all the child processes are done. A bit hackish
2133 # but works fine enough.
2134 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2135 prefix='trace_signal_file')
2136
2137 # Note: do not use the -p flag. It's useless if the initial process quits
2138 # too fast, resulting in missing traces from the grand-children. The D
2139 # code manages the dtrace lifetime itself.
2140 trace_cmd = [
2141 'sudo',
2142 'dtrace',
2143 # Use a larger buffer if getting 'out of scratch space' errors.
2144 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2145 '-b', '10m',
2146 '-x', 'dynvarsize=10m',
2147 #'-x', 'dtrace_global_maxsize=1m',
2148 '-x', 'evaltime=exec',
2149 '-o', '/dev/stderr',
2150 '-q',
2151 '-n', self._get_dtrace_code(),
2152 ]
2153 with open(self._logname + '.log', 'wb') as logfile:
2154 self._dtrace = subprocess.Popen(
2155 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2156 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2157
2158 # Reads until one line is printed, which signifies dtrace is up and ready.
2159 with open(self._logname + '.log', 'rb') as logfile:
2160 while 'dtrace_BEGIN' not in logfile.readline():
2161 if self._dtrace.poll() is not None:
2162 # Do a busy wait. :/
2163 break
2164 logging.debug('dtrace started')
2165
2166 def _get_dtrace_code(self):
2167 """Setups the D code to implement child process tracking.
2168
2169 Injects the cookie in the script so it knows when to stop.
2170
2171 The script will detect any instance of the script created with
2172 create_thunk() and will start tracing it.
2173 """
2174 return (
2175 'inline int PID = %d;\n'
2176 'inline string SCRIPT = "%s";\n'
2177 'inline int FILE_ID = %d;\n'
2178 '\n'
2179 '%s') % (
2180 os.getpid(),
2181 self._script,
2182 self._dummy_file_id,
2183 self.D_CODE) + self.D_CODE_EXECVE
2184
2185 def trace(self, cmd, cwd, tracename, output):
2186 """Runs dtrace on an executable.
2187
2188 This dtruss is broken when it starts the process itself or when tracing
2189 child processes, this code starts a wrapper process
2190 generated with create_thunk() which starts the executable to trace.
2191 """
2192 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2193 assert os.path.isabs(cmd[0]), cmd[0]
2194 assert os.path.isabs(cwd), cwd
2195 assert os.path.normpath(cwd) == cwd, cwd
2196 with self._lock:
2197 if not self._initialized:
2198 raise TracingFailure(
2199 'Called Tracer.trace() on an unitialized object',
2200 None, None, None, tracename)
2201 assert tracename not in (i['trace'] for i in self._traces)
2202
2203 # Starts the script wrapper to start the child process. This signals the
2204 # dtrace script that this process is to be traced.
2205 stdout = stderr = None
2206 if output:
2207 stdout = subprocess.PIPE
2208 stderr = subprocess.STDOUT
2209 child_cmd = [
2210 sys.executable,
2211 self._script,
2212 tracename,
2213 ]
2214 # Call a dummy function so that dtrace knows I'm about to launch a process
2215 # that needs to be traced.
2216 # Yummy.
2217 child = subprocess.Popen(
2218 child_cmd + fix_python_path(cmd),
2219 stdin=subprocess.PIPE,
2220 stdout=stdout,
2221 stderr=stderr,
2222 cwd=cwd)
2223 logging.debug('Started child pid: %d' % child.pid)
2224
2225 out = child.communicate()[0]
2226 # This doesn't mean tracing is done, one of the grand-child process may
2227 # still be alive. It will be tracked with the dtrace script.
2228
2229 with self._lock:
2230 assert tracename not in (i['trace'] for i in self._traces)
2231 self._traces.append(
2232 {
2233 'cmd': cmd,
2234 'cwd': cwd,
2235 # The pid of strace process, not very useful.
2236 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002237 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002238 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002239 })
2240 return child.returncode, out
2241
2242 def close(self, timeout=None):
2243 """Terminates dtrace."""
2244 logging.debug('close(%s)' % timeout)
2245 try:
2246 try:
2247 super(Dtrace.Tracer, self).close(timeout)
2248 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002249 # ftruncate doesn't exist on Windows.
2250 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002251 if timeout:
2252 start = time.time()
2253 # Use polling. :/
2254 while (self._dtrace.poll() is None and
2255 (time.time() - start) < timeout):
2256 time.sleep(0.1)
2257 self._dtrace.kill()
2258 self._dtrace.wait()
2259 finally:
2260 # Make sure to kill it in any case.
2261 if self._dtrace.poll() is None:
2262 try:
2263 self._dtrace.kill()
2264 self._dtrace.wait()
2265 except OSError:
2266 pass
2267
2268 if self._dtrace.returncode != 0:
2269 # Warn about any dtrace failure but basically ignore it.
2270 print 'dtrace failure: %s' % self._dtrace.returncode
2271 finally:
2272 os.close(self._dummy_file_id)
2273 os.remove(self._dummy_file_name)
2274 os.remove(self._script)
2275
2276 def post_process_log(self):
2277 """Sorts the log back in order when each call occured.
2278
2279 dtrace doesn't save the buffer in strict order since it keeps one buffer
2280 per CPU.
2281 """
2282 super(Dtrace.Tracer, self).post_process_log()
2283 logname = self._logname + '.log'
2284 with open(logname, 'rb') as logfile:
2285 lines = [l for l in logfile if l.strip()]
2286 errors = [l for l in lines if l.startswith('dtrace:')]
2287 if errors:
2288 raise TracingFailure(
2289 'Found errors in the trace: %s' % '\n'.join(errors),
2290 None, None, None, logname)
2291 try:
2292 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2293 except ValueError:
2294 raise TracingFailure(
2295 'Found errors in the trace: %s' % '\n'.join(
2296 l for l in lines if l.split(' ', 1)[0].isdigit()),
2297 None, None, None, logname)
2298 with open(logname, 'wb') as logfile:
2299 logfile.write(''.join(lines))
2300
2301 @staticmethod
2302 def clean_trace(logname):
2303 for ext in ('', '.log'):
2304 if os.path.isfile(logname + ext):
2305 os.remove(logname + ext)
2306
2307 @classmethod
2308 def parse_log(cls, logname, blacklist):
2309 logging.info('parse_log(%s, ...)' % logname)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002310 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002311
2312 def blacklist_more(filepath):
2313 # All the HFS metadata is in the form /.vol/...
2314 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2315
2316 data = read_json(logname)
2317 out = []
2318 for item in data['traces']:
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002319 result = {
2320 'output': item['output'],
2321 'trace': item['trace'],
2322 }
2323 try:
2324 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
2325 for line in open(logname + '.log', 'rb'):
2326 context.on_line(line)
2327 result['results'] = context.to_results()
2328 except TracingFailure:
2329 result['exception'] = sys.exc_info()
2330 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002331 return out
2332
2333
2334class LogmanTrace(ApiBase):
2335 """Uses the native Windows ETW based tracing functionality to trace a child
2336 process.
2337
2338 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2339 the Windows Kernel doesn't have a concept of 'current working directory' at
2340 all. A Win32 process has a map of current directories, one per drive letter
2341 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2342 opened relative to another file_object or as an absolute path. All the current
2343 working directory logic is done in user mode.
2344 """
2345 class Context(ApiBase.Context):
2346 """Processes a ETW log line and keeps the list of existent and non
2347 existent files accessed.
2348
2349 Ignores directories.
2350 """
2351 # These indexes are for the stripped version in json.
2352 EVENT_NAME = 0
2353 TYPE = 1
2354 PID = 2
2355 TID = 3
2356 PROCESSOR_ID = 4
2357 TIMESTAMP = 5
2358 USER_DATA = 6
2359
2360 class Process(ApiBase.Context.Process):
2361 def __init__(self, *args):
2362 super(LogmanTrace.Context.Process, self).__init__(*args)
2363 # Handle file objects that succeeded.
2364 self.file_objects = {}
2365
2366 def __init__(self, blacklist, tracer_pid):
2367 logging.info('%s(%d)' % (self.__class__.__name__, tracer_pid))
2368 super(LogmanTrace.Context, self).__init__(blacklist)
2369 self._drive_map = DosDriveMap()
2370 # Threads mapping to the corresponding process id.
2371 self._threads_active = {}
2372 # Process ID of the tracer, e.g. tracer_inputs.py
2373 self._tracer_pid = tracer_pid
2374 self._line_number = 0
2375
2376 def on_line(self, line):
2377 """Processes a json Event line."""
2378 self._line_number += 1
2379 try:
2380 # By Opcode
2381 handler = getattr(
2382 self,
2383 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2384 None)
2385 if not handler:
2386 raise TracingFailure(
2387 'Unexpected event %s_%s' % (
2388 line[self.EVENT_NAME], line[self.TYPE]),
2389 None, None, None)
2390 handler(line)
2391 except TracingFailure, e:
2392 # Hack in the values since the handler could be a static function.
2393 e.pid = line[self.PID]
2394 e.line = line
2395 e.line_number = self._line_number
2396 # Re-raise the modified exception.
2397 raise
2398 except (KeyError, NotImplementedError, ValueError), e:
2399 raise TracingFailure(
2400 'Trace generated a %s exception: %s' % (
2401 e.__class__.__name__, str(e)),
2402 line[self.PID],
2403 self._line_number,
2404 line,
2405 e)
2406
2407 def to_results(self):
2408 if not self.root_process:
2409 raise TracingFailure(
2410 'Failed to detect the initial process',
2411 None, None, None)
2412 process = self.root_process.to_results_process()
2413 return Results(process)
2414
2415 def _thread_to_process(self, tid):
2416 """Finds the process from the thread id."""
2417 tid = int(tid, 16)
2418 pid = self._threads_active.get(tid)
2419 if not pid or not self._process_lookup.get(pid):
2420 return
2421 return self._process_lookup[pid]
2422
2423 @classmethod
2424 def handle_EventTrace_Header(cls, line):
2425 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2426 BUFFER_SIZE = cls.USER_DATA
2427 #VERSION = cls.USER_DATA + 1
2428 #PROVIDER_VERSION = cls.USER_DATA + 2
2429 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2430 #END_TIME = cls.USER_DATA + 4
2431 #TIMER_RESOLUTION = cls.USER_DATA + 5
2432 #MAX_FILE_SIZE = cls.USER_DATA + 6
2433 #LOG_FILE_MODE = cls.USER_DATA + 7
2434 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2435 #START_BUFFERS = cls.USER_DATA + 9
2436 #POINTER_SIZE = cls.USER_DATA + 10
2437 EVENTS_LOST = cls.USER_DATA + 11
2438 #CPU_SPEED = cls.USER_DATA + 12
2439 #LOGGER_NAME = cls.USER_DATA + 13
2440 #LOG_FILE_NAME = cls.USER_DATA + 14
2441 #BOOT_TIME = cls.USER_DATA + 15
2442 #PERF_FREQ = cls.USER_DATA + 16
2443 #START_TIME = cls.USER_DATA + 17
2444 #RESERVED_FLAGS = cls.USER_DATA + 18
2445 #BUFFERS_LOST = cls.USER_DATA + 19
2446 #SESSION_NAME_STRING = cls.USER_DATA + 20
2447 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2448 if line[EVENTS_LOST] != '0':
2449 raise TracingFailure(
2450 ( '%s events were lost during trace, please increase the buffer '
2451 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2452 None, None, None)
2453
2454 def handle_FileIo_Cleanup(self, line):
2455 """General wisdom: if a file is closed, it's because it was opened.
2456
2457 Note that FileIo_Close is not used since if a file was opened properly but
2458 not closed before the process exits, only Cleanup will be logged.
2459 """
2460 #IRP = self.USER_DATA
2461 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2462 FILE_OBJECT = self.USER_DATA + 2
2463 #FILE_KEY = self.USER_DATA + 3
2464 proc = self._thread_to_process(line[TTID])
2465 if not proc:
2466 # Not a process we care about.
2467 return
2468 file_object = line[FILE_OBJECT]
2469 if file_object in proc.file_objects:
2470 proc.add_file(proc.file_objects.pop(file_object), False)
2471
2472 def handle_FileIo_Create(self, line):
2473 """Handles a file open.
2474
2475 All FileIo events are described at
2476 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2477 for some value of 'description'.
2478
2479 " (..) process and thread id values of the IO events (..) are not valid "
2480 http://msdn.microsoft.com/magazine/ee358703.aspx
2481
2482 The FileIo.Create event doesn't return if the CreateFile() call
2483 succeeded, so keep track of the file_object and check that it is
2484 eventually closed with FileIo_Cleanup.
2485 """
2486 #IRP = self.USER_DATA
2487 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2488 FILE_OBJECT = self.USER_DATA + 2
2489 #CREATE_OPTIONS = self.USER_DATA + 3
2490 #FILE_ATTRIBUTES = self.USER_DATA + 4
2491 #self.USER_DATA + SHARE_ACCESS = 5
2492 OPEN_PATH = self.USER_DATA + 6
2493
2494 proc = self._thread_to_process(line[TTID])
2495 if not proc:
2496 # Not a process we care about.
2497 return
2498
2499 match = re.match(r'^\"(.+)\"$', line[OPEN_PATH])
2500 raw_path = match.group(1)
2501 # Ignore directories and bare drive right away.
2502 if raw_path.endswith(os.path.sep):
2503 return
2504 filepath = self._drive_map.to_win32(raw_path)
2505 # Ignore bare drive right away. Some may still fall through with format
2506 # like '\\?\X:'
2507 if len(filepath) == 2:
2508 return
2509 file_object = line[FILE_OBJECT]
2510 if os.path.isdir(filepath):
2511 # There is no O_DIRECTORY equivalent on Windows. The closed is
2512 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2513 # simply discard directories are they are found.
2514 return
2515 # Override any stale file object
2516 proc.file_objects[file_object] = filepath
2517
2518 def handle_FileIo_Rename(self, line):
2519 # TODO(maruel): Handle?
2520 pass
2521
2522 def handle_Process_End(self, line):
2523 pid = line[self.PID]
2524 if self._process_lookup.get(pid):
2525 logging.info('Terminated: %d' % pid)
2526 self._process_lookup[pid] = None
2527 else:
2528 logging.debug('Terminated: %d' % pid)
2529
2530 def handle_Process_Start(self, line):
2531 """Handles a new child process started by PID."""
2532 #UNIQUE_PROCESS_KEY = self.USER_DATA
2533 PROCESS_ID = self.USER_DATA + 1
2534 #PARENT_PID = self.USER_DATA + 2
2535 #SESSION_ID = self.USER_DATA + 3
2536 #EXIT_STATUS = self.USER_DATA + 4
2537 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2538 #USER_SID = self.USER_DATA + 6
2539 IMAGE_FILE_NAME = self.USER_DATA + 7
2540 COMMAND_LINE = self.USER_DATA + 8
2541
2542 ppid = line[self.PID]
2543 pid = int(line[PROCESS_ID], 16)
2544 logging.debug(
2545 'New process %d->%d (%s) %s' %
2546 (ppid, pid, line[IMAGE_FILE_NAME], line[COMMAND_LINE]))
2547
2548 if ppid == self._tracer_pid:
2549 # Need to ignore processes we don't know about because the log is
2550 # system-wide. self._tracer_pid shall start only one process.
2551 if self.root_process:
2552 raise TracingFailure(
2553 ( 'Parent process is _tracer_pid(%d) but root_process(%d) is '
2554 'already set') % (self._tracer_pid, self.root_process.pid),
2555 None, None, None)
2556 proc = self.Process(self.blacklist, pid, None)
2557 self.root_process = proc
2558 ppid = None
2559 elif self._process_lookup.get(ppid):
2560 proc = self.Process(self.blacklist, pid, None)
2561 self._process_lookup[ppid].children.append(proc)
2562 else:
2563 # Ignore
2564 return
2565 self._process_lookup[pid] = proc
2566
2567 if (not line[IMAGE_FILE_NAME].startswith('"') or
2568 not line[IMAGE_FILE_NAME].endswith('"')):
2569 raise TracingFailure(
2570 'Command line is not properly quoted: %s' % line[IMAGE_FILE_NAME],
2571 None, None, None)
2572
2573 # TODO(maruel): Process escapes.
2574 if (not line[COMMAND_LINE].startswith('"') or
2575 not line[COMMAND_LINE].endswith('"')):
2576 raise TracingFailure(
2577 'Command line is not properly quoted: %s' % line[COMMAND_LINE],
2578 None, None, None)
2579 proc.command = CommandLineToArgvW(line[COMMAND_LINE][1:-1])
2580 proc.executable = line[IMAGE_FILE_NAME][1:-1]
2581 # proc.command[0] may be the absolute path of 'executable' but it may be
2582 # anything else too. If it happens that command[0] ends with executable,
2583 # use it, otherwise defaults to the base name.
2584 cmd0 = proc.command[0].lower()
2585 if not cmd0.endswith('.exe'):
2586 # TODO(maruel): That's not strictly true either.
2587 cmd0 += '.exe'
2588 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
2589 # Fix the path.
2590 cmd0 = cmd0.replace('/', os.path.sep)
2591 cmd0 = os.path.normpath(cmd0)
2592 proc.executable = get_native_path_case(cmd0)
2593 logging.info(
2594 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2595
2596 def handle_Thread_End(self, line):
2597 """Has the same parameters as Thread_Start."""
2598 tid = int(line[self.TID], 16)
2599 self._threads_active.pop(tid, None)
2600
2601 def handle_Thread_Start(self, line):
2602 """Handles a new thread created.
2603
2604 Do not use self.PID here since a process' initial thread is created by
2605 the parent process.
2606 """
2607 PROCESS_ID = self.USER_DATA
2608 TTHREAD_ID = self.USER_DATA + 1
2609 #STACK_BASE = self.USER_DATA + 2
2610 #STACK_LIMIT = self.USER_DATA + 3
2611 #USER_STACK_BASE = self.USER_DATA + 4
2612 #USER_STACK_LIMIT = self.USER_DATA + 5
2613 #AFFINITY = self.USER_DATA + 6
2614 #WIN32_START_ADDR = self.USER_DATA + 7
2615 #TEB_BASE = self.USER_DATA + 8
2616 #SUB_PROCESS_TAG = self.USER_DATA + 9
2617 #BASE_PRIORITY = self.USER_DATA + 10
2618 #PAGE_PRIORITY = self.USER_DATA + 11
2619 #IO_PRIORITY = self.USER_DATA + 12
2620 #THREAD_FLAGS = self.USER_DATA + 13
2621 # Do not use self.PID here since a process' initial thread is created by
2622 # the parent process.
2623 pid = int(line[PROCESS_ID], 16)
2624 tid = int(line[TTHREAD_ID], 16)
2625 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2626 self._threads_active[tid] = pid
2627
2628 @classmethod
2629 def supported_events(cls):
2630 """Returns all the procesed events."""
2631 out = []
2632 for member in dir(cls):
2633 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2634 if match:
2635 out.append(match.groups())
2636 return out
2637
2638 class Tracer(ApiBase.Tracer):
2639 # The basic headers.
2640 EXPECTED_HEADER = [
2641 u'Event Name',
2642 u'Type',
2643 u'Event ID',
2644 u'Version',
2645 u'Channel',
2646 u'Level', # 5
2647 u'Opcode',
2648 u'Task',
2649 u'Keyword',
2650 u'PID',
2651 u'TID', # 10
2652 u'Processor Number',
2653 u'Instance ID',
2654 u'Parent Instance ID',
2655 u'Activity ID',
2656 u'Related Activity ID', # 15
2657 u'Clock-Time',
2658 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2659 u'User(ms)', # pretty much useless.
2660 u'User Data', # Extra arguments that are event-specific.
2661 ]
2662 # Only the useful headers common to all entries are listed there. Any column
2663 # at 19 or higher is dependent on the specific event.
2664 EVENT_NAME = 0
2665 TYPE = 1
2666 PID = 9
2667 TID = 10
2668 PROCESSOR_ID = 11
2669 TIMESTAMP = 16
2670 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2671 USER_DATA = 19
2672
2673 def __init__(self, logname):
2674 """Starts the log collection.
2675
2676 Requires administrative access. logman.exe is synchronous so no need for a
2677 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2678 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2679
2680 One can get the list of potentially interesting providers with:
2681 "logman query providers | findstr /i file"
2682 """
2683 super(LogmanTrace.Tracer, self).__init__(logname)
2684 self._script = create_thunk()
2685 cmd_start = [
2686 'logman.exe',
2687 'start',
2688 'NT Kernel Logger',
2689 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2690 # splitio,fileiocompletion,syscall,file,cswitch,img
2691 '(process,fileio,thread)',
2692 '-o', self._logname + '.etl',
2693 '-ets', # Send directly to kernel
2694 # Values extracted out of thin air.
2695 # Event Trace Session buffer size in kb.
2696 '-bs', '10240',
2697 # Number of Event Trace Session buffers.
2698 '-nb', '16', '256',
2699 ]
2700 logging.debug('Running: %s' % cmd_start)
2701 try:
2702 subprocess.check_call(
2703 cmd_start,
2704 stdin=subprocess.PIPE,
2705 stdout=subprocess.PIPE,
2706 stderr=subprocess.STDOUT)
2707 except subprocess.CalledProcessError, e:
2708 if e.returncode == -2147024891:
2709 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2710 elif e.returncode == -2144337737:
2711 print >> sys.stderr, (
2712 'A kernel trace was already running, stop it and try again')
2713 raise
2714
2715 def trace(self, cmd, cwd, tracename, output):
2716 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2717 assert os.path.isabs(cmd[0]), cmd[0]
2718 assert os.path.isabs(cwd), cwd
2719 assert os.path.normpath(cwd) == cwd, cwd
2720 with self._lock:
2721 if not self._initialized:
2722 raise TracingFailure(
2723 'Called Tracer.trace() on an unitialized object',
2724 None, None, None, tracename)
2725 assert tracename not in (i['trace'] for i in self._traces)
2726
2727 # Use "logman -?" for help.
2728
2729 stdout = stderr = None
2730 if output:
2731 stdout = subprocess.PIPE
2732 stderr = subprocess.STDOUT
2733
2734 # Run the child process.
2735 logging.debug('Running: %s' % cmd)
2736 # Use the temporary script generated with create_thunk() so we have a
2737 # clear pid owner. Since trace_inputs.py can be used as a library and
2738 # could trace multiple processes simultaneously, it makes it more complex
2739 # if the executable to be traced is executed directly here. It also solves
2740 # issues related to logman.exe that needs to be executed to control the
2741 # kernel trace.
2742 child_cmd = [
2743 sys.executable,
2744 self._script,
2745 tracename,
2746 ]
2747 child = subprocess.Popen(
2748 child_cmd + fix_python_path(cmd),
2749 cwd=cwd,
2750 stdin=subprocess.PIPE,
2751 stdout=stdout,
2752 stderr=stderr)
2753 logging.debug('Started child pid: %d' % child.pid)
2754 out = child.communicate()[0]
2755 # This doesn't mean all the grand-children are done. Sadly, we don't have
2756 # a good way to determine that.
2757
2758 with self._lock:
2759 assert tracename not in (i['trace'] for i in self._traces)
2760 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002761 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002762 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002763 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002764 'pid': child.pid,
2765 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002766 })
2767
2768 return child.returncode, out
2769
2770 def close(self, _timeout=None):
2771 """Stops the kernel log collection and converts the traces to text
2772 representation.
2773 """
2774 with self._lock:
2775 if not self._initialized:
2776 raise TracingFailure(
2777 'Called Tracer.close() on an unitialized object',
2778 None, None, None)
2779 os.remove(self._script)
2780 # Save metadata, add 'format' key..
2781 data = {
2782 'format': 'csv',
2783 'traces': self._traces,
2784 }
2785 write_json(self._logname, data, False)
2786
2787 cmd_stop = [
2788 'logman.exe',
2789 'stop',
2790 'NT Kernel Logger',
2791 '-ets', # Sends the command directly to the kernel.
2792 ]
2793 logging.debug('Running: %s' % cmd_stop)
2794 subprocess.check_call(
2795 cmd_stop,
2796 stdin=subprocess.PIPE,
2797 stdout=subprocess.PIPE,
2798 stderr=subprocess.STDOUT)
2799 self._initialized = False
2800
2801 def post_process_log(self):
2802 """Converts the .etl file into .csv then into .json."""
2803 super(LogmanTrace.Tracer, self).post_process_log()
2804 logformat = 'csv'
2805 self._convert_log(logformat)
2806
2807 if logformat == 'csv_utf16':
2808 def load_file():
2809 def utf_8_encoder(unicode_csv_data):
2810 """Encodes the unicode object as utf-8 encoded str instance"""
2811 for line in unicode_csv_data:
2812 yield line.encode('utf-8')
2813
2814 def unicode_csv_reader(unicode_csv_data, **kwargs):
2815 """Encodes temporarily as UTF-8 since csv module doesn't do unicode.
2816 """
2817 csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), **kwargs)
2818 for row in csv_reader:
2819 # Decode str utf-8 instances back to unicode instances, cell by
2820 # cell:
2821 yield [cell.decode('utf-8') for cell in row]
2822
2823 # The CSV file is UTF-16 so use codecs.open() to load the file into
2824 # the python internal unicode format (utf-8). Then explicitly
2825 # re-encode as utf8 as str instances so csv can parse it fine. Then
2826 # decode the utf-8 str back into python unicode instances. This
2827 # sounds about right.
2828 for line in unicode_csv_reader(
2829 codecs.open(self._logname + '.' + logformat, 'r', 'utf-16')):
2830 # line is a list of unicode objects
2831 # So much white space!
2832 yield [i.strip() for i in line]
2833
2834 elif logformat == 'csv':
2835 def load_file():
2836 def ansi_csv_reader(ansi_csv_data, **kwargs):
2837 """Loads an 'ANSI' code page and returns unicode() objects."""
2838 assert sys.getfilesystemencoding() == 'mbcs'
2839 encoding = get_current_encoding()
2840 for row in csv.reader(ansi_csv_data, **kwargs):
2841 # Decode str 'ansi' instances to unicode instances, cell by cell:
2842 yield [cell.decode(encoding) for cell in row]
2843
2844 # The fastest and smallest format but only supports 'ANSI' file paths.
2845 # E.g. the filenames are encoding in the 'current' encoding.
2846 for line in ansi_csv_reader(open(self._logname + '.' + logformat)):
2847 # line is a list of unicode objects.
2848 yield [i.strip() for i in line]
2849
2850 supported_events = LogmanTrace.Context.supported_events()
2851
2852 def trim(generator):
2853 for index, line in enumerate(generator):
2854 if not index:
2855 if line != self.EXPECTED_HEADER:
2856 raise TracingFailure(
2857 'Found malformed header: %s' % ' '.join(line),
2858 None, None, None)
2859 continue
2860 # As you can see, the CSV is full of useful non-redundant information:
2861 if (line[2] != '0' or # Event ID
2862 line[3] not in ('2', '3') or # Version
2863 line[4] != '0' or # Channel
2864 line[5] != '0' or # Level
2865 line[7] != '0' or # Task
2866 line[8] != '0x0000000000000000' or # Keyword
2867 line[12] != '' or # Instance ID
2868 line[13] != '' or # Parent Instance ID
2869 line[14] != self.NULL_GUID or # Activity ID
2870 line[15] != ''): # Related Activity ID
2871 raise TracingFailure(
2872 'Found unexpected values in line: %s' % ' '.join(line),
2873 None, None, None)
2874
2875 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
2876 continue
2877
2878 # Convert the PID in-place from hex.
2879 line[self.PID] = int(line[self.PID], 16)
2880
2881 yield [
2882 line[self.EVENT_NAME],
2883 line[self.TYPE],
2884 line[self.PID],
2885 line[self.TID],
2886 line[self.PROCESSOR_ID],
2887 line[self.TIMESTAMP],
2888 ] + line[self.USER_DATA:]
2889
2890 write_json('%s.json' % self._logname, list(trim(load_file())), True)
2891
2892 def _convert_log(self, logformat):
2893 """Converts the ETL trace to text representation.
2894
2895 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
2896 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
2897
2898 Arguments:
2899 - logformat: Text format to be generated, csv, csv_utf16 or xml.
2900
2901 Use "tracerpt -?" for help.
2902 """
2903 LOCALE_INVARIANT = 0x7F
2904 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
2905 cmd_convert = [
2906 'tracerpt.exe',
2907 '-l', self._logname + '.etl',
2908 '-o', self._logname + '.' + logformat,
2909 '-gmt', # Use UTC
2910 '-y', # No prompt
2911 # Use -of XML to get the header of each items after column 19, e.g. all
2912 # the actual headers of 'User Data'.
2913 ]
2914
2915 if logformat == 'csv':
2916 # tracerpt localizes the 'Type' column, for major brainfuck
2917 # entertainment. I can't imagine any sane reason to do that.
2918 cmd_convert.extend(['-of', 'CSV'])
2919 elif logformat == 'csv_utf16':
2920 # This causes it to use UTF-16, which doubles the log size but ensures
2921 # the log is readable for non-ASCII characters.
2922 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
2923 elif logformat == 'xml':
2924 cmd_convert.extend(['-of', 'XML'])
2925 else:
2926 raise ValueError('Unexpected log format \'%s\'' % logformat)
2927 logging.debug('Running: %s' % cmd_convert)
2928 # This can takes tens of minutes for large logs.
2929 # Redirects all output to stderr.
2930 subprocess.check_call(
2931 cmd_convert,
2932 stdin=subprocess.PIPE,
2933 stdout=sys.stderr,
2934 stderr=sys.stderr)
2935
2936 @staticmethod
2937 def clean_trace(logname):
2938 for ext in ('', '.csv', '.etl', '.json', '.xml'):
2939 if os.path.isfile(logname + ext):
2940 os.remove(logname + ext)
2941
2942 @classmethod
2943 def parse_log(cls, logname, blacklist):
2944 logging.info('parse_log(%s, %s)' % (logname, blacklist))
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002945 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002946
2947 def blacklist_more(filepath):
2948 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
2949 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
2950
2951 data = read_json(logname)
2952 lines = read_json(logname + '.json')
2953 out = []
2954 for item in data['traces']:
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002955 result = {
2956 'output': item['output'],
2957 'trace': item['trace'],
2958 }
2959 try:
2960 context = cls.Context(blacklist_more, item['pid'], item['trace'])
2961 for line in lines:
2962 context.on_line(line)
2963 result['results'] = context.to_results()
2964 except TracingFailure:
2965 result['exception'] = sys.exc_info()
2966 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002967 return out
2968
2969
2970def get_api():
2971 """Returns the correct implementation for the current OS."""
2972 if sys.platform == 'cygwin':
2973 raise NotImplementedError(
2974 'Not implemented for cygwin, start the script from Win32 python')
2975 flavors = {
2976 'win32': LogmanTrace,
2977 'darwin': Dtrace,
2978 'sunos5': Dtrace,
2979 'freebsd7': Dtrace,
2980 'freebsd8': Dtrace,
2981 }
2982 # Defaults to strace.
2983 return flavors.get(sys.platform, Strace)()
2984
2985
2986def extract_directories(root_dir, files, blacklist):
2987 """Detects if all the files in a directory are in |files| and if so, replace
2988 the individual files by a Results.Directory instance.
2989
2990 Takes a list of Results.File instances and returns a shorter list of
2991 Results.File and Results.Directory instances.
2992
2993 Arguments:
2994 - root_dir: Optional base directory that shouldn't be search further.
2995 - files: list of Results.File instances.
2996 - blacklist: regexp of files to ignore, for example r'.+\.pyc'.
2997 """
2998 logging.info(
2999 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3000 assert not (root_dir or '').endswith(os.path.sep), root_dir
3001 assert not root_dir or (get_native_path_case(root_dir) == root_dir)
3002 assert not any(isinstance(f, Results.Directory) for f in files)
3003 # Remove non existent files.
3004 files = [f for f in files if f.existent]
3005 if not files:
3006 return files
3007 # All files must share the same root, which can be None.
3008 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3009
3010 # Creates a {directory: {filename: File}} mapping, up to root.
3011 buckets = {}
3012 if root_dir:
3013 buckets[root_dir] = {}
3014 for fileobj in files:
3015 path = fileobj.full_path
3016 directory = os.path.dirname(path)
3017 assert directory
3018 # Do not use os.path.basename() so trailing os.path.sep is kept.
3019 basename = path[len(directory)+1:]
3020 files_in_directory = buckets.setdefault(directory, {})
3021 files_in_directory[basename] = fileobj
3022 # Add all the directories recursively up to root.
3023 while True:
3024 old_d = directory
3025 directory = os.path.dirname(directory)
3026 if directory + os.path.sep == root_dir or directory == old_d:
3027 break
3028 buckets.setdefault(directory, {})
3029
3030 root_prefix = len(root_dir) + 1 if root_dir else 0
3031 for directory in sorted(buckets, reverse=True):
3032 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3033 expected = set(buckets[directory])
3034 if not (actual - expected):
3035 parent = os.path.dirname(directory)
3036 buckets[parent][os.path.basename(directory)] = Results.Directory(
3037 root_dir,
3038 directory[root_prefix:],
3039 False,
3040 sum(f.size for f in buckets[directory].itervalues()),
3041 sum(f.nb_files for f in buckets[directory].itervalues()))
3042 # Remove the whole bucket.
3043 del buckets[directory]
3044
3045 # Reverse the mapping with what remains. The original instances are returned,
3046 # so the cached meta data is kept.
3047 files = sum((x.values() for x in buckets.itervalues()), [])
3048 return sorted(files, key=lambda x: x.path)
3049
3050
3051def trace(logfile, cmd, cwd, api, output):
3052 """Traces an executable. Returns (returncode, output) from api.
3053
3054 Arguments:
3055 - logfile: file to write to.
3056 - cmd: command to run.
3057 - cwd: current directory to start the process in.
3058 - api: a tracing api instance.
3059 - output: if True, returns output, otherwise prints it at the console.
3060 """
3061 cmd = fix_python_path(cmd)
3062 api.clean_trace(logfile)
3063 with api.get_tracer(logfile) as tracer:
3064 return tracer.trace(cmd, cwd, 'default', output)
3065
3066
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003067def CMDclean(args):
3068 """Cleans up traces."""
3069 parser = OptionParserTraceInputs(command='clean')
3070 options, args = parser.parse_args(args)
3071 api = get_api()
3072 api.clean_trace(options.log)
3073 return 0
3074
3075
3076def CMDtrace(args):
3077 """Traces an executable."""
3078 parser = OptionParserTraceInputs(command='trace')
3079 parser.allow_interspersed_args = False
3080 parser.add_option(
3081 '-q', '--quiet', action='store_true',
3082 help='Redirects traced executable output to /dev/null')
3083 options, args = parser.parse_args(args)
3084
3085 if not args:
3086 parser.error('Please provide a command to run')
3087
3088 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3089 args[0] = os.path.abspath(args[0])
3090
3091 api = get_api()
3092 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3093
3094
3095def CMDread(args):
3096 """Reads the logs and prints the result."""
3097 parser = OptionParserTraceInputs(command='read')
3098 parser.add_option(
3099 '-V', '--variable',
3100 nargs=2,
3101 action='append',
3102 dest='variables',
3103 metavar='VAR_NAME directory',
3104 default=[],
3105 help=('Variables to replace relative directories against. Example: '
3106 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3107 'home dir with $HOME') % getpass.getuser())
3108 parser.add_option(
3109 '--root-dir',
3110 help='Root directory to base everything off it. Anything outside of this '
3111 'this directory will not be reported')
3112 parser.add_option(
3113 '-j', '--json', action='store_true',
3114 help='Outputs raw result data as json')
3115 parser.add_option(
3116 '-b', '--blacklist', action='append', default=[],
3117 help='List of regexp to use as blacklist filter')
3118 options, args = parser.parse_args(args)
3119
3120 if options.root_dir:
3121 options.root_dir = os.path.abspath(options.root_dir)
3122
3123 variables = dict(options.variables)
3124 api = get_api()
3125 def blacklist(f):
3126 return any(re.match(b, f) for b in options.blacklist)
3127 data = api.parse_log(options.log, blacklist)
3128 # Process each trace.
3129 output_as_json = []
3130 for item in data:
3131 if 'exception' in item:
maruel@chromium.org424f5692012-10-15 15:42:38 +00003132 # Do not abort the other traces.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003133 print >> sys.stderr, (
maruel@chromium.org424f5692012-10-15 15:42:38 +00003134 'Trace %s: Got an exception: %s' % (
3135 item['trace'], item['exception'][1]))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003136 continue
3137 results = item['results']
3138 if options.root_dir:
3139 results = results.strip_root(options.root_dir)
3140
3141 if options.json:
3142 output_as_json.append(results.flatten())
3143 else:
3144 simplified = extract_directories(
3145 options.root_dir, results.files, blacklist)
3146 simplified = [f.replace_variables(variables) for f in simplified]
3147 if len(data) > 1:
3148 print('Trace: %s' % item['trace'])
3149 print('Total: %d' % len(results.files))
3150 print('Non existent: %d' % len(results.non_existent))
3151 for f in results.non_existent:
3152 print(' %s' % f.path)
3153 print(
3154 'Interesting: %d reduced to %d' % (
3155 len(results.existent), len(simplified)))
3156 for f in simplified:
3157 print(' %s' % f.path)
3158
3159 if options.json:
3160 write_json(sys.stdout, output_as_json, False)
3161 return 0
3162
3163
3164class OptionParserWithLogging(optparse.OptionParser):
3165 """Adds --verbose option."""
3166 def __init__(self, verbose=0, **kwargs):
3167 optparse.OptionParser.__init__(self, **kwargs)
3168 self.add_option(
3169 '-v', '--verbose',
3170 action='count',
3171 default=verbose,
3172 help='Use multiple times to increase verbosity')
3173
3174 def parse_args(self, *args, **kwargs):
3175 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
3176 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
3177 logging.basicConfig(
3178 level=levels[min(len(levels)-1, options.verbose)],
3179 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
3180 return options, args
3181
3182
3183class OptionParserWithNiceDescription(OptionParserWithLogging):
3184 """Generates the description with the command's docstring."""
3185 def __init__(self, **kwargs):
3186 """Sets 'description' and 'usage' if not already specified."""
3187 command = kwargs.pop('command', 'help')
3188 kwargs.setdefault(
3189 'description',
3190 re.sub('[\r\n ]{2,}', ' ', get_command_handler(command).__doc__))
3191 kwargs.setdefault('usage', '%%prog %s [options]' % command)
3192 OptionParserWithLogging.__init__(self, **kwargs)
3193
3194
3195class OptionParserTraceInputs(OptionParserWithNiceDescription):
3196 """Adds automatic --log handling."""
3197 def __init__(self, **kwargs):
3198 OptionParserWithNiceDescription.__init__(self, **kwargs)
3199 self.add_option(
3200 '-l', '--log', help='Log file to generate or read, required')
3201
3202 def parse_args(self, *args, **kwargs):
3203 """Makes sure the paths make sense.
3204
3205 On Windows, / and \ are often mixed together in a path.
3206 """
3207 options, args = OptionParserWithNiceDescription.parse_args(
3208 self, *args, **kwargs)
3209 if not options.log:
3210 self.error('Must supply a log file with -l')
3211 options.log = os.path.abspath(options.log)
3212 return options, args
3213
3214
3215def extract_documentation():
3216 """Returns a dict {command: description} for each of documented command."""
3217 commands = (
3218 fn[3:]
3219 for fn in dir(sys.modules['__main__'])
3220 if fn.startswith('CMD') and get_command_handler(fn[3:]).__doc__)
3221 return dict((fn, get_command_handler(fn).__doc__) for fn in commands)
3222
3223
3224def CMDhelp(args):
3225 """Prints list of commands or help for a specific command."""
3226 doc = extract_documentation()
3227 # Calculates the optimal offset.
3228 offset = max(len(cmd) for cmd in doc)
3229 format_str = ' %-' + str(offset + 2) + 's %s'
3230 # Generate a one-liner documentation of each commands.
3231 commands_description = '\n'.join(
3232 format_str % (cmd, doc[cmd].split('\n')[0]) for cmd in sorted(doc))
3233
3234 parser = OptionParserWithNiceDescription(
3235 usage='%prog <command> [options]',
3236 description='Commands are:\n%s\n' % commands_description)
3237 parser.format_description = lambda _: parser.description
3238
3239 # Strip out any -h or --help argument.
3240 _, args = parser.parse_args([i for i in args if not i in ('-h', '--help')])
3241 if len(args) == 1:
3242 if not get_command_handler(args[0]):
3243 parser.error('Unknown command %s' % args[0])
3244 # The command was "%prog help command", replaces ourself with
3245 # "%prog command --help" so help is correctly printed out.
3246 return main(args + ['--help'])
3247 elif args:
3248 parser.error('Unknown argument "%s"' % ' '.join(args))
3249 parser.print_help()
3250 return 0
3251
3252
3253def get_command_handler(name):
3254 """Returns the command handler or CMDhelp if it doesn't exist."""
3255 return getattr(sys.modules['__main__'], 'CMD%s' % name, None)
3256
3257
3258def main_impl(argv):
3259 command = get_command_handler(argv[0] if argv else 'help')
3260 if not command:
3261 return CMDhelp(argv)
3262 return command(argv[1:])
3263
3264def main(argv):
3265 try:
3266 main_impl(argv)
3267 except TracingFailure, e:
3268 sys.stderr.write('\nError: ')
3269 sys.stderr.write(str(e))
3270 sys.stderr.write('\n')
3271 return 1
3272
3273
3274if __name__ == '__main__':
3275 sys.exit(main(sys.argv[1:]))