blob: c870f41cf32b82af1954316db0020baedfbbb6a6 [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
3# Copyright (c) 2012 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
23import getpass
24import glob
25import json
26import logging
27import optparse
28import os
29import re
30import subprocess
31import sys
32import tempfile
33import threading
34import time
35import weakref
36
37## OS-specific imports
38
39if sys.platform == 'win32':
40 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p
41 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611
42 from ctypes.wintypes import GetLastError # pylint: disable=E0611
43elif sys.platform == 'darwin':
44 import Carbon.File # pylint: disable=F0401
45 import MacOS # pylint: disable=F0401
46
47
48BASE_DIR = os.path.dirname(os.path.abspath(__file__))
49ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
50
51
52class TracingFailure(Exception):
53 """An exception occured during tracing."""
54 def __init__(self, description, pid, line_number, line, *args):
55 super(TracingFailure, self).__init__(
56 description, pid, line_number, line, *args)
57 self.description = description
58 self.pid = pid
59 self.line_number = line_number
60 self.line = line
61 self.extra = args
62
63 def __str__(self):
64 out = self.description
65 if self.pid:
66 out += '\npid: %d' % self.pid
67 if self.line_number:
68 out += '\nline: %d' % self.line_number
69 if self.line:
70 out += '\n%s' % self.line
71 if self.extra:
72 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
73 return out
74
75
76## OS-specific functions
77
78if sys.platform == 'win32':
79 def QueryDosDevice(drive_letter):
80 """Returns the Windows 'native' path for a DOS drive letter."""
81 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
82 # Guesswork. QueryDosDeviceW never returns the required number of bytes.
83 chars = 1024
84 drive_letter = unicode(drive_letter)
85 p = create_unicode_buffer(chars)
86 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):
87 err = GetLastError()
88 if err:
89 # pylint: disable=E0602
90 raise WindowsError(
91 err,
92 'QueryDosDevice(%s): %s (%d)' % (
93 str(drive_letter), FormatError(err), err))
94 return p.value
95
96
97 def GetShortPathName(long_path):
98 """Returns the Windows short path equivalent for a 'long' path."""
99 long_path = unicode(long_path)
100 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
101 # not enforced.
102 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):
103 long_path = '\\\\?\\' + long_path
104 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)
105 if chars:
106 p = create_unicode_buffer(chars)
107 if windll.kernel32.GetShortPathNameW(long_path, p, chars):
108 return p.value
109
110 err = GetLastError()
111 if err:
112 # pylint: disable=E0602
113 raise WindowsError(
114 err,
115 'GetShortPathName(%s): %s (%d)' % (
116 str(long_path), FormatError(err), err))
117
118
119 def GetLongPathName(short_path):
120 """Returns the Windows long path equivalent for a 'short' path."""
121 short_path = unicode(short_path)
122 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
123 # not enforced.
124 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):
125 short_path = '\\\\?\\' + short_path
126 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)
127 if chars:
128 p = create_unicode_buffer(chars)
129 if windll.kernel32.GetLongPathNameW(short_path, p, chars):
130 return p.value
131
132 err = GetLastError()
133 if err:
134 # pylint: disable=E0602
135 raise WindowsError(
136 err,
137 'GetLongPathName(%s): %s (%d)' % (
138 str(short_path), FormatError(err), err))
139
140
141 def get_current_encoding():
142 """Returns the 'ANSI' code page associated to the process."""
143 return 'cp%d' % int(windll.kernel32.GetACP())
144
145
146 class DosDriveMap(object):
147 """Maps \Device\HarddiskVolumeN to N: on Windows."""
148 # Keep one global cache.
149 _MAPPING = {}
150
151 def __init__(self):
152 """Lazy loads the cache."""
153 if not self._MAPPING:
154 # This is related to UNC resolver on windows. Ignore that.
155 self._MAPPING['\\Device\\Mup'] = None
156 self._MAPPING['\\SystemRoot'] = os.environ['SystemRoot']
157
158 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
159 try:
160 letter = '%s:' % letter
161 mapped = QueryDosDevice(letter)
162 if mapped in self._MAPPING:
163 logging.warn(
164 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
165 '. Drive letters are a user-mode concept and the kernel '
166 'traces only have NT path, so all accesses will be '
167 'associated with the first drive letter, independent of the '
168 'actual letter used by the code') % (
169 self._MAPPING[mapped], letter))
170 else:
171 self._MAPPING[mapped] = letter
172 except WindowsError: # pylint: disable=E0602
173 pass
174
175 def to_win32(self, path):
176 """Converts a native NT path to Win32/DOS compatible path."""
177 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
178 if not match:
179 raise ValueError(
180 'Can\'t convert %s into a Win32 compatible path' % path,
181 path)
182 if not match.group(1) in self._MAPPING:
183 # Unmapped partitions may be accessed by windows for the
184 # fun of it while the test is running. Discard these.
185 return None
186 drive = self._MAPPING[match.group(1)]
187 if not drive or not match.group(2):
188 return drive
189 return drive + match.group(2)
190
191
192 def isabs(path):
193 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""
194 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
195
196
197 def get_native_path_case(p):
198 """Returns the native path case for an existing file.
199
200 On Windows, removes any leading '\\?\'.
201 """
202 if not isabs(p):
203 raise ValueError(
204 'Can\'t get native path case for a non-absolute path: %s' % p,
205 p)
206 # Windows used to have an option to turn on case sensitivity on non Win32
207 # subsystem but that's out of scope here and isn't supported anymore.
208 # Go figure why GetShortPathName() is needed.
209 try:
210 out = GetLongPathName(GetShortPathName(p))
211 except OSError, e:
212 if e.args[0] in (2, 3, 5):
213 # The path does not exist. Try to recurse and reconstruct the path.
214 base = os.path.dirname(p)
215 rest = os.path.basename(p)
216 return os.path.join(get_native_path_case(base), rest)
217 raise
218 if out.startswith('\\\\?\\'):
219 out = out[4:]
220 # Always upper case the first letter since GetLongPathName() will return the
221 # drive letter in the case it was given.
222 return out[0].upper() + out[1:]
223
224
225 def CommandLineToArgvW(command_line):
226 """Splits a commandline into argv using CommandLineToArgvW()."""
227 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
228 size = c_int()
229 ptr = windll.shell32.CommandLineToArgvW(unicode(command_line), byref(size))
230 try:
231 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
232 finally:
233 windll.kernel32.LocalFree(ptr)
234
235
236elif sys.platform == 'darwin':
237
238
239 # On non-windows, keep the stdlib behavior.
240 isabs = os.path.isabs
241
242
243 def _find_item_native_case(root_path, item):
244 """Gets the native path case of a single item based at root_path.
245
246 There is no API to get the native path case of symlinks on OSX. So it
247 needs to be done the slow way.
248 """
249 item = item.lower()
250 for element in os.listdir(root_path):
251 if element.lower() == item:
252 return element
253
254
255 def _native_case(p):
256 """Gets the native path case. Warning: this function resolves symlinks."""
257 logging.debug('native_case(%s)' % p)
258 try:
259 rel_ref, _ = Carbon.File.FSPathMakeRef(p)
260 out = rel_ref.FSRefMakePath()
261 if p.endswith(os.path.sep) and not out.endswith(os.path.sep):
262 return out + os.path.sep
263 return out
264 except MacOS.Error, e:
265 if e.args[0] in (-43, -120):
266 # The path does not exist. Try to recurse and reconstruct the path.
267 # -43 means file not found.
268 # -120 means directory not found.
269 base = os.path.dirname(p)
270 rest = os.path.basename(p)
271 return os.path.join(_native_case(base), rest)
272 raise OSError(
273 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])
274
275
276 def _split_at_symlink_native(base_path, rest):
277 """Returns the native path for a symlink."""
278 base, symlink, rest = split_at_symlink(base_path, rest)
279 if symlink:
280 if not base_path:
281 base_path = base
282 else:
283 base_path = safe_join(base_path, base)
284 symlink = _find_item_native_case(base_path, symlink)
285 return base, symlink, rest
286
287
288 def get_native_path_case(path):
289 """Returns the native path case for an existing file.
290
291 Technically, it's only HFS+ on OSX that is case preserving and
292 insensitive. It's the default setting on HFS+ but can be changed.
293 """
294 if not isabs(path):
295 raise ValueError(
296 'Can\'t get native path case for a non-absolute path: %s' % path,
297 path)
298 if path.startswith('/dev'):
299 # /dev is not visible from Carbon, causing an exception.
300 return path
301
302 # Starts assuming there is no symlink along the path.
303 resolved = _native_case(path)
304 if resolved.lower() == path.lower():
305 # This code path is incredibly faster.
306 return resolved
307
308 # There was a symlink, process it.
309 base, symlink, rest = _split_at_symlink_native(None, path)
310 assert symlink, (path, base, symlink, rest, resolved)
311 prev = base
312 base = safe_join(_native_case(base), symlink)
313 assert len(base) > len(prev)
314 while rest:
315 prev = base
316 relbase, symlink, rest = _split_at_symlink_native(base, rest)
317 base = safe_join(base, relbase)
318 assert len(base) > len(prev), (prev, base, symlink)
319 if symlink:
320 base = safe_join(base, symlink)
321 assert len(base) > len(prev), (prev, base, symlink)
322 # Make sure no symlink was resolved.
323 assert base.lower() == path.lower(), (base, path)
324 return base
325
326
327else: # OSes other than Windows and OSX.
328
329
330 # On non-windows, keep the stdlib behavior.
331 isabs = os.path.isabs
332
333
334 def get_native_path_case(path):
335 """Returns the native path case for an existing file.
336
337 On OSes other than OSX and Windows, assume the file system is
338 case-sensitive.
339
340 TODO(maruel): This is not strictly true. Implement if necessary.
341 """
342 if not isabs(path):
343 raise ValueError(
344 'Can\'t get native path case for a non-absolute path: %s' % path,
345 path)
346 # Give up on cygwin, as GetLongPathName() can't be called.
347 # Linux traces tends to not be normalized so use this occasion to normalize
348 # it. This function implementation already normalizes the path on the other
349 # OS so this needs to be done here to be coherent between OSes.
350 out = os.path.normpath(path)
351 if path.endswith(os.path.sep) and not out.endswith(os.path.sep):
352 return out + os.path.sep
353 return out
354
355
356if sys.platform != 'win32': # All non-Windows OSes.
357
358
359 def safe_join(*args):
360 """Joins path elements like os.path.join() but doesn't abort on absolute
361 path.
362
363 os.path.join('foo', '/bar') == '/bar'
364 but safe_join('foo', '/bar') == 'foo/bar'.
365 """
366 out = ''
367 for element in args:
368 if element.startswith(os.path.sep):
369 if out.endswith(os.path.sep):
370 out += element[1:]
371 else:
372 out += element
373 else:
374 if out.endswith(os.path.sep):
375 out += element
376 else:
377 out += os.path.sep + element
378 return out
379
380
381 def split_at_symlink(base_dir, relfile):
382 """Scans each component of relfile and cut the string at the symlink if
383 there is any.
384
385 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if
386 not symlink was found.
387 """
388 if base_dir:
389 assert relfile
390 assert os.path.isabs(base_dir)
391 index = 0
392 else:
393 assert os.path.isabs(relfile)
394 index = 1
395
396 def at_root(rest):
397 if base_dir:
398 return safe_join(base_dir, rest)
399 return rest
400
401 while True:
402 try:
403 index = relfile.index(os.path.sep, index)
404 except ValueError:
405 index = len(relfile)
406 full = at_root(relfile[:index])
407 if os.path.islink(full):
408 # A symlink!
409 base = os.path.dirname(relfile[:index])
410 symlink = os.path.basename(relfile[:index])
411 rest = relfile[index:]
412 logging.debug(
413 'split_at_symlink(%s, %s) -> (%s, %s, %s)' %
414 (base_dir, relfile, base, symlink, rest))
415 return base, symlink, rest
416 if index == len(relfile):
417 break
418 index += 1
419 return relfile, None, None
420
421
422def fix_python_path(cmd):
423 """Returns the fixed command line to call the right python executable."""
424 out = cmd[:]
425 if out[0] == 'python':
426 out[0] = sys.executable
427 elif out[0].endswith('.py'):
428 out.insert(0, sys.executable)
429 return out
430
431
432def create_thunk():
433 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
434 os.write(
435 handle,
436 (
437 'import subprocess\n'
438 'import sys\n'
439 'sys.exit(subprocess.call(sys.argv[2:]))\n'
440 ))
441 os.close(handle)
442 return name
443
444
445def strace_process_quoted_arguments(text):
446 """Extracts quoted arguments on a string and return the arguments as a list.
447
448 Implemented as an automaton. Supports incomplete strings in the form
449 '"foo"...'.
450
451 Example:
452 With text = '"foo", "bar"', the function will return ['foo', 'bar']
453
454 TODO(maruel): Implement escaping.
455 """
456 # All the possible states of the DFA.
457 ( NEED_QUOTE, # Begining of a new arguments.
458 INSIDE_STRING, # Inside an argument.
459 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
460 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
461 # a serie of 3 dots or a comma.
462 NEED_SPACE, # Right after a comma
463 NEED_DOT_2, # Found a dot, need a second one.
464 NEED_DOT_3, # Found second dot, need a third one.
465 NEED_COMMA, # Found third dot, need a comma.
466 ) = range(8)
467
468 state = NEED_QUOTE
469 out = []
470 for index, char in enumerate(text):
471 if char == '"':
472 if state == NEED_QUOTE:
473 state = INSIDE_STRING
474 # A new argument was found.
475 out.append('')
476 elif state == INSIDE_STRING:
477 # The argument is now closed.
478 state = NEED_COMMA_OR_DOT
479 elif state == ESCAPED:
480 out[-1] += char
481 state = INSIDE_STRING
482 else:
483 raise ValueError(
484 'Can\'t process char at column %d for: %r' % (index, text),
485 index,
486 text)
487 elif char == ',':
488 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
489 state = NEED_SPACE
490 elif state == INSIDE_STRING:
491 out[-1] += char
492 elif state == ESCAPED:
493 out[-1] += char
494 state = INSIDE_STRING
495 else:
496 raise ValueError(
497 'Can\'t process char at column %d for: %r' % (index, text),
498 index,
499 text)
500 elif char == ' ':
501 if state == NEED_SPACE:
502 state = NEED_QUOTE
503 elif state == INSIDE_STRING:
504 out[-1] += char
505 elif state == ESCAPED:
506 out[-1] += char
507 state = INSIDE_STRING
508 else:
509 raise ValueError(
510 'Can\'t process char at column %d for: %r' % (index, text),
511 index,
512 text)
513 elif char == '.':
514 if state == NEED_COMMA_OR_DOT:
515 # The string is incomplete, this mean the strace -s flag should be
516 # increased.
517 state = NEED_DOT_2
518 elif state == NEED_DOT_2:
519 state = NEED_DOT_3
520 elif state == NEED_DOT_3:
521 state = NEED_COMMA
522 elif state == INSIDE_STRING:
523 out[-1] += char
524 elif state == ESCAPED:
525 out[-1] += char
526 state = INSIDE_STRING
527 else:
528 raise ValueError(
529 'Can\'t process char at column %d for: %r' % (index, text),
530 index,
531 text)
532 elif char == '\\':
533 if state == ESCAPED:
534 out[-1] += char
535 state = INSIDE_STRING
536 elif state == INSIDE_STRING:
537 state = ESCAPED
538 else:
539 raise ValueError(
540 'Can\'t process char at column %d for: %r' % (index, text),
541 index,
542 text)
543 else:
544 if state == INSIDE_STRING:
545 out[-1] += char
546 else:
547 raise ValueError(
548 'Can\'t process char at column %d for: %r' % (index, text),
549 index,
550 text)
551 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
552 raise ValueError(
553 'String is incorrectly terminated: %r' % text,
554 text)
555 return out
556
557
558def read_json(filepath):
559 with open(filepath, 'r') as f:
560 return json.load(f)
561
562
563def write_json(filepath_or_handle, data, dense):
564 """Writes data into filepath or file handle encoded as json.
565
566 If dense is True, the json is packed. Otherwise, it is human readable.
567 """
568 if hasattr(filepath_or_handle, 'write'):
569 if dense:
570 filepath_or_handle.write(
571 json.dumps(data, sort_keys=True, separators=(',',':')))
572 else:
573 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
574 else:
575 with open(filepath_or_handle, 'wb') as f:
576 if dense:
577 json.dump(data, f, sort_keys=True, separators=(',',':'))
578 else:
579 json.dump(data, f, sort_keys=True, indent=2)
580
581
582class Results(object):
583 """Results of a trace session."""
584
585 class _TouchedObject(object):
586 """Something, a file or a directory, that was accessed."""
587 def __init__(self, root, path, tainted, size, nb_files):
588 logging.debug(
589 '%s(%s, %s, %s, %s, %s)' %
590 (self.__class__.__name__, root, path, tainted, size, nb_files))
591 self.root = root
592 self.path = path
593 self.tainted = tainted
594 self.nb_files = nb_files
595 # Can be used as a cache or a default value, depending on context.
596 self._size = size
597 # These are cache only.
598 self._real_path = None
599
600 # Check internal consistency.
601 assert path, path
602 assert tainted or bool(root) != bool(isabs(path)), (root, path)
603 assert tainted or (
604 not os.path.exists(self.full_path) or
605 (self.full_path == get_native_path_case(self.full_path))), (
606 tainted, self.full_path, get_native_path_case(self.full_path))
607
608 @property
609 def existent(self):
610 return self.size != -1
611
612 @property
613 def full_path(self):
614 if self.root:
615 return os.path.join(self.root, self.path)
616 return self.path
617
618 @property
619 def real_path(self):
620 """Returns the path with symlinks resolved."""
621 if not self._real_path:
622 self._real_path = os.path.realpath(self.full_path)
623 return self._real_path
624
625 @property
626 def size(self):
627 """File's size. -1 is not existent."""
628 if self._size is None and not self.tainted:
629 try:
630 self._size = os.stat(self.full_path).st_size
631 except OSError:
632 self._size = -1
633 return self._size
634
635 def flatten(self):
636 """Returns a dict representing this object.
637
638 A 'size' of 0 means the file was only touched and not read.
639 """
640 return {
641 'path': self.path,
642 'size': self.size,
643 }
644
645 def replace_variables(self, variables):
646 """Replaces the root of this File with one of the variables if it matches.
647
648 If a variable replacement occurs, the cloned object becomes tainted.
649 """
650 for variable, root_path in variables.iteritems():
651 if self.path.startswith(root_path):
652 return self._clone(
653 self.root, variable + self.path[len(root_path):], True)
654 # No need to clone, returns ourself.
655 return self
656
657 def strip_root(self, root):
658 """Returns a clone of itself with 'root' stripped off."""
659 # Check internal consistency.
660 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root
661 if not self.full_path.startswith(root):
662 # Now try to resolve the symlinks to see if it can be reached this way.
663 # Only try *after* trying without resolving symlink.
664 if not self.real_path.startswith(root):
665 return None
666 path = self.real_path
667 else:
668 path = self.full_path
669 return self._clone(root, path[len(root):], self.tainted)
670
671 def _clone(self, new_root, new_path, tainted):
672 raise NotImplementedError(self.__class__.__name__)
673
674 class File(_TouchedObject):
675 """A file that was accessed. May not be present anymore.
676
677 If tainted is true, it means it is not a real path anymore as a variable
678 replacement occured.
679
680 If touched_only is True, this means the file was probed for existence, and
681 it is existent, but was never _opened_. If touched_only is True, the file
682 must have existed.
683 """
684 def __init__(self, root, path, tainted, size):
685 super(Results.File, self).__init__(root, path, tainted, size, 1)
686
687 def _clone(self, new_root, new_path, tainted):
688 """Clones itself keeping meta-data."""
689 # Keep the self.size and self._real_path caches for performance reason. It
690 # is also important when the file becomes tainted (with a variable instead
691 # of the real path) since self.path is not an on-disk path anymore so
692 # out._size cannot be updated.
693 out = self.__class__(new_root, new_path, tainted, self.size)
694 out._real_path = self._real_path
695 return out
696
697 class Directory(_TouchedObject):
698 """A directory of files. Must exist."""
699 def __init__(self, root, path, tainted, size, nb_files):
700 """path='.' is a valid value and must be handled appropriately."""
701 assert not path.endswith(os.path.sep), path
702 super(Results.Directory, self).__init__(
703 root, path + os.path.sep, tainted, size, nb_files)
704 # For a Directory instance, self.size is not a cache, it's an actual value
705 # that is never modified and represents the total size of the files
706 # contained in this directory. It is possible that the directory is empty
707 # so that size == 0; this happens if there's only an invalid symlink in
708 # it.
709
710 def flatten(self):
711 out = super(Results.Directory, self).flatten()
712 out['nb_files'] = self.nb_files
713 return out
714
715 def _clone(self, new_root, new_path, tainted):
716 """Clones itself keeping meta-data."""
717 out = self.__class__(
718 new_root,
719 new_path.rstrip(os.path.sep),
720 tainted,
721 self.size,
722 self.nb_files)
723 out._real_path = self._real_path
724 return out
725
726 class Process(object):
727 """A process that was traced.
728
729 Contains references to the files accessed by this process and its children.
730 """
731 def __init__(self, pid, files, executable, command, initial_cwd, children):
732 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
733 self.pid = pid
734 self.files = sorted(files, key=lambda x: x.path)
735 self.children = children
736 self.executable = executable
737 self.command = command
738 self.initial_cwd = initial_cwd
739
740 # Check internal consistency.
741 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
742 f.path for f in self.files)
743 assert isinstance(self.children, list)
744 assert isinstance(self.files, list)
745
746 @property
747 def all(self):
748 for child in self.children:
749 for i in child.all:
750 yield i
751 yield self
752
753 def flatten(self):
754 return {
755 'children': [c.flatten() for c in self.children],
756 'command': self.command,
757 'executable': self.executable,
758 'files': [f.flatten() for f in self.files],
759 'initial_cwd': self.initial_cwd,
760 'pid': self.pid,
761 }
762
763 def strip_root(self, root):
764 assert isabs(root) and root.endswith(os.path.sep), root
765 # Loads the files after since they are constructed as objects.
766 out = self.__class__(
767 self.pid,
768 filter(None, (f.strip_root(root) for f in self.files)),
769 self.executable,
770 self.command,
771 self.initial_cwd,
772 [c.strip_root(root) for c in self.children])
773 logging.debug(
774 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
775 return out
776
777 def __init__(self, process):
778 self.process = process
779 # Cache.
780 self._files = None
781
782 def flatten(self):
783 return {
784 'root': self.process.flatten(),
785 }
786
787 @property
788 def files(self):
789 if self._files is None:
790 self._files = sorted(
791 sum((p.files for p in self.process.all), []),
792 key=lambda x: x.path)
793 return self._files
794
795 @property
796 def existent(self):
797 return [f for f in self.files if f.existent]
798
799 @property
800 def non_existent(self):
801 return [f for f in self.files if not f.existent]
802
803 def strip_root(self, root):
804 """Returns a clone with all the files outside the directory |root| removed
805 and converts all the path to be relative paths.
806 """
807 # Resolve any symlink
808 root = os.path.realpath(root)
809 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep
810 logging.debug('strip_root(%s)' % root)
811 return Results(self.process.strip_root(root))
812
813
814class ApiBase(object):
815 """OS-agnostic API to trace a process and its children."""
816 class Context(object):
817 """Processes one log line at a time and keeps the list of traced processes.
818
819 The parsing is complicated by the fact that logs are traced out of order for
820 strace but in-order for dtrace and logman. In addition, on Windows it is
821 very frequent that processids are reused so a flat list cannot be used. But
822 at the same time, it is impossible to faithfully construct a graph when the
823 logs are processed out of order. So both a tree and a flat mapping are used,
824 the tree is the real process tree, while the flat mapping stores the last
825 valid process for the corresponding processid. For the strace case, the
826 tree's head is guessed at the last moment.
827 """
828 class Process(object):
829 """Keeps context for one traced child process.
830
831 Logs all the files this process touched. Ignores directories.
832 """
833 def __init__(self, blacklist, pid, initial_cwd):
834 # Check internal consistency.
835 assert isinstance(pid, int), repr(pid)
836 self.pid = pid
837 # children are Process instances.
838 self.children = []
839 self.initial_cwd = initial_cwd
840 self.cwd = None
841 self.files = set()
842 self.only_touched = set()
843 self.executable = None
844 self.command = None
845 self._blacklist = blacklist
846
847 def to_results_process(self):
848 """Resolves file case sensitivity and or late-bound strings."""
849 # When resolving files, it's normal to get dupe because a file could be
850 # opened multiple times with different case. Resolve the deduplication
851 # here.
852 def fix_path(x):
853 """Returns the native file path case.
854
855 Converts late-bound strings.
856 """
857 if not x:
858 # Do not convert None instance to 'None'.
859 return x
860 # TODO(maruel): Do not upconvert to unicode here, on linux we don't
861 # know the file path encoding so they must be treated as bytes.
862 x = unicode(x)
863 if os.path.isabs(x):
864 # If the path is not absolute, which tends to happen occasionally on
865 # Windows, it is not possible to get the native path case so ignore
866 # that trace. It mostly happens for 'executable' value.
867 x = get_native_path_case(x)
868 return x
869
870 def fix_and_blacklist_path(x):
871 x = fix_path(x)
872 if not x:
873 return
874 # The blacklist needs to be reapplied, since path casing could
875 # influence blacklisting.
876 if self._blacklist(x):
877 return
878 return x
879
880 # Filters out directories. Some may have passed through.
881 files = set(f for f in map(fix_and_blacklist_path, self.files) if f)
882 only_touched = set(
883 f for f in map(fix_and_blacklist_path, self.only_touched) if f)
884 only_touched -= files
885
886 files = [
887 Results.File(None, f, False, None) for f in files
888 if not os.path.isdir(f)
889 ]
890 # Using 0 as size means the file's content is ignored since the file was
891 # never opened for I/O.
892 files.extend(
893 Results.File(None, f, False, 0) for f in only_touched
894 if not os.path.isdir(f)
895 )
896 return Results.Process(
897 self.pid,
898 files,
899 fix_path(self.executable),
900 self.command,
901 fix_path(self.initial_cwd),
902 [c.to_results_process() for c in self.children])
903
904 def add_file(self, filepath, touch_only):
905 """Adds a file if it passes the blacklist."""
906 if self._blacklist(unicode(filepath)):
907 return
908 logging.debug('add_file(%d, %s, %s)' % (self.pid, filepath, touch_only))
909 # Note that filepath and not unicode(filepath) is added. It is because
910 # filepath could be something else than a string, like a RelativePath
911 # instance for dtrace logs.
912 if touch_only:
913 self.only_touched.add(filepath)
914 else:
915 self.files.add(filepath)
916
917 def __init__(self, blacklist):
918 self.blacklist = blacklist
919 # Initial process.
920 self.root_process = None
921 # dict to accelerate process lookup, to not have to lookup the whole graph
922 # each time.
923 self._process_lookup = {}
924
925 class Tracer(object):
926 """During it's lifetime, the tracing subsystem is enabled."""
927 def __init__(self, logname):
928 self._logname = logname
929 self._lock = threading.Lock()
930 self._traces = []
931 self._initialized = True
932
933 def trace(self, cmd, cwd, tracename, output):
934 """Runs the OS-specific trace program on an executable.
935
936 Arguments:
937 - cmd: The command (a list) to run.
938 - cwd: Current directory to start the child process in.
939 - tracename: Name of the trace in the logname file.
940 - output: If False, redirects output to PIPEs.
941
942 Returns a tuple (resultcode, output) and updates the internal trace
943 entries.
944 """
945 # The implementation adds an item to self._traces.
946 raise NotImplementedError(self.__class__.__name__)
947
948 def close(self, _timeout=None):
949 """Saves the meta-data in the logname file.
950
951 For kernel-based tracing, stops the tracing subsystem.
952
953 Must not be used manually when using 'with' construct.
954 """
955 with self._lock:
956 assert self._initialized
957 try:
958 data = {
959 'traces': self._traces,
960 }
961 write_json(self._logname, data, False)
962 finally:
963 self._initialized = False
964
965 def post_process_log(self):
966 """Post-processes the log so it becomes faster to load afterward.
967
968 Must not be used manually when using 'with' construct.
969 """
970 assert not self._initialized, 'Must stop tracing first.'
971
972 def __enter__(self):
973 """Enables 'with' statement."""
974 return self
975
976 def __exit__(self, exc_type, exc_value, traceback):
977 """Enables 'with' statement."""
978 self.close()
979 # If an exception was thrown, do not process logs.
980 if not exc_type:
981 self.post_process_log()
982
983 def get_tracer(self, logname):
984 """Returns an ApiBase.Tracer instance.
985
986 Initializes the tracing subsystem, which is a requirement for kernel-based
987 tracers. Only one tracer instance should be live at a time!
988
989 logname is the filepath to the json file that will contain the meta-data
990 about the logs.
991 """
992 return self.Tracer(logname)
993
994 @staticmethod
995 def clean_trace(logname):
996 """Deletes an old log."""
997 raise NotImplementedError()
998
999 @classmethod
1000 def parse_log(cls, logname, blacklist):
1001 """Processes trace logs and returns the files opened and the files that do
1002 not exist.
1003
1004 It does not track directories.
1005
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001006 |logname| must be an absolute path.
1007
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001008 Most of the time, files that do not exist are temporary test files that
1009 should be put in /tmp instead. See http://crbug.com/116251.
1010
1011 Returns a list of dict with keys:
1012 - results: A Results instance.
1013 - trace: The corresponding tracename parameter provided to
1014 get_tracer().trace().
1015 - output: Output gathered during execution, if get_tracer().trace(...,
1016 output=False) was used.
1017 """
1018 raise NotImplementedError(cls.__class__.__name__)
1019
1020
1021class Strace(ApiBase):
1022 """strace implies linux."""
1023 class Context(ApiBase.Context):
1024 """Processes a strace log line and keeps the list of existent and non
1025 existent files accessed.
1026
1027 Ignores directories.
1028
1029 Uses late-binding to processes the cwd of each process. The problem is that
1030 strace generates one log file per process it traced but doesn't give any
1031 information about which process was started when and by who. So we don't
1032 even know which process is the initial one. So process the logs out of
1033 order and use late binding with RelativePath to be able to deduce the
1034 initial directory of each process once all the logs are parsed.
1035 """
1036 class Process(ApiBase.Context.Process):
1037 """Represents the state of a process.
1038
1039 Contains all the information retrieved from the pid-specific log.
1040 """
1041 # Function names are using ([a-z_0-9]+)
1042 # This is the most common format. function(args) = result
1043 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.+?)\)\s+= (.+)$')
1044 # An interrupted function call, only grab the minimal header.
1045 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001046 # An interrupted function call, with the process exiting. It must be the
1047 # last line in the log.
1048 RE_UNFINISHED_EXIT = re.compile(
1049 r'^([^\(]+)(.*) \<unfinished \.\.\.\ exit status \d+>$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001050 # A resumed function call.
1051 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
1052 # A process received a signal.
1053 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
1054 # A process didn't handle a signal. Ignore any junk appearing before,
1055 # because the process was forcibly killed so it won't open any new file.
1056 RE_KILLED = re.compile(
1057 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
1058 # The process has exited.
1059 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
1060 # A call was canceled. Ignore any prefix.
1061 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
1062 # Happens when strace fails to even get the function name.
1063 UNNAMED_FUNCTION = '????'
1064
1065 # Corner-case in python, a class member function decorator must not be
1066 # @staticmethod.
1067 def parse_args(regexp, expect_zero): # pylint: disable=E0213
1068 """Automatically convert the str 'args' into a list of processed
1069 arguments.
1070
1071 Arguments:
1072 - regexp is used to parse args.
1073 - expect_zero: one of True, False or None.
1074 - True: will check for result.startswith('0') first and will ignore
1075 the trace line completely otherwise. This is important because for
1076 many functions, the regexp will not process if the call failed.
1077 - False: will check for not result.startswith(('?', '-1')) for the
1078 same reason than with True.
1079 - None: ignore result.
1080 """
1081 def meta_hook(function):
1082 assert function.__name__.startswith('handle_')
1083 def hook(self, args, result):
1084 if expect_zero is True and not result.startswith('0'):
1085 return
1086 if expect_zero is False and result.startswith(('?', '-1')):
1087 return
1088 match = re.match(regexp, args)
1089 if not match:
1090 raise TracingFailure(
1091 'Failed to parse %s(%s) = %s' %
1092 (function.__name__[len('handle_'):], args, result),
1093 None, None, None)
1094 return function(self, match.groups(), result)
1095 return hook
1096 return meta_hook
1097
1098 class RelativePath(object):
1099 """A late-bound relative path."""
1100 def __init__(self, parent, value):
1101 self.parent = parent
1102 self.value = value
1103
1104 def render(self):
1105 """Returns the current directory this instance is representing.
1106
1107 This function is used to return the late-bound value.
1108 """
1109 if self.value and self.value.startswith(u'/'):
1110 # An absolute path.
1111 return self.value
1112 parent = self.parent.render() if self.parent else u'<None>'
1113 if self.value:
1114 return os.path.normpath(os.path.join(parent, self.value))
1115 return parent
1116
1117 def __unicode__(self):
1118 """Acts as a string whenever needed."""
1119 return unicode(self.render())
1120
1121 def __str__(self):
1122 """Acts as a string whenever needed."""
1123 return str(self.render())
1124
1125 def __init__(self, root, pid):
1126 """Keeps enough information to be able to guess the original process
1127 root.
1128
1129 strace doesn't store which process was the initial process. So more
1130 information needs to be kept so the graph can be reconstructed from the
1131 flat map.
1132 """
1133 logging.info('%s(%d)' % (self.__class__.__name__, pid))
1134 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
1135 assert isinstance(root, ApiBase.Context)
1136 self._root = weakref.ref(root)
1137 # The dict key is the function name of the pending call, like 'open'
1138 # or 'execve'.
1139 self._pending_calls = {}
1140 self._line_number = 0
1141 # Current directory when the process started.
1142 self.initial_cwd = self.RelativePath(self._root(), None)
1143 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001144 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001145
1146 def get_cwd(self):
1147 """Returns the best known value of cwd."""
1148 return self.cwd or self.initial_cwd
1149
1150 def render(self):
1151 """Returns the string value of the RelativePath() object.
1152
1153 Used by RelativePath. Returns the initial directory and not the
1154 current one since the current directory 'cwd' validity is time-limited.
1155
1156 The validity is only guaranteed once all the logs are processed.
1157 """
1158 return self.initial_cwd.render()
1159
1160 def on_line(self, line):
1161 self._line_number += 1
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001162 if self._done:
1163 raise TracingFailure(
1164 'Found a trace for a terminated process',
1165 None, None, None)
1166
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001167 if self.RE_SIGNAL.match(line):
1168 # Ignore signals.
1169 return
1170
1171 try:
1172 match = self.RE_KILLED.match(line)
1173 if match:
1174 # Converts a '+++ killed by Foo +++' trace into an exit_group().
1175 self.handle_exit_group(match.group(1), None)
1176 return
1177
1178 match = self.RE_PROCESS_EXITED.match(line)
1179 if match:
1180 # Converts a '+++ exited with 1 +++' trace into an exit_group()
1181 self.handle_exit_group(match.group(1), None)
1182 return
1183
1184 match = self.RE_UNFINISHED.match(line)
1185 if match:
1186 if match.group(1) in self._pending_calls:
1187 raise TracingFailure(
1188 'Found two unfinished calls for the same function',
1189 None, None, None,
1190 self._pending_calls)
1191 self._pending_calls[match.group(1)] = (
1192 match.group(1) + match.group(2))
1193 return
1194
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001195 match = self.RE_UNFINISHED_EXIT.match(line)
1196 if match:
1197 # The process died. No other line can be processed afterward.
1198 self._done = True
1199 return
1200
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001201 match = self.RE_UNAVAILABLE.match(line)
1202 if match:
1203 # This usually means a process was killed and a pending call was
1204 # canceled.
1205 # TODO(maruel): Look up the last exit_group() trace just above and
1206 # make sure any self._pending_calls[anything] is properly flushed.
1207 return
1208
1209 match = self.RE_RESUMED.match(line)
1210 if match:
1211 if match.group(1) not in self._pending_calls:
1212 raise TracingFailure(
1213 'Found a resumed call that was not logged as unfinished',
1214 None, None, None,
1215 self._pending_calls)
1216 pending = self._pending_calls.pop(match.group(1))
1217 # Reconstruct the line.
1218 line = pending + match.group(2)
1219
1220 match = self.RE_HEADER.match(line)
1221 if not match:
1222 raise TracingFailure(
1223 'Found an invalid line: %s' % line,
1224 None, None, None)
1225 if match.group(1) == self.UNNAMED_FUNCTION:
1226 return
1227
1228 # It's a valid line, handle it.
1229 handler = getattr(self, 'handle_%s' % match.group(1), None)
1230 if not handler:
1231 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1232 return handler(match.group(2), match.group(3))
1233 except TracingFailure, e:
1234 # Hack in the values since the handler could be a static function.
1235 e.pid = self.pid
1236 e.line = line
1237 e.line_number = self._line_number
1238 # Re-raise the modified exception.
1239 raise
1240 except (KeyError, NotImplementedError, ValueError), e:
1241 raise TracingFailure(
1242 'Trace generated a %s exception: %s' % (
1243 e.__class__.__name__, str(e)),
1244 self.pid,
1245 self._line_number,
1246 line,
1247 e)
1248
1249 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1250 def handle_access(self, args, _result):
1251 self._handle_file(args[0], True)
1252
1253 @parse_args(r'^\"(.+?)\"$', True)
1254 def handle_chdir(self, args, _result):
1255 """Updates cwd."""
1256 self.cwd = self.RelativePath(self, args[0])
1257 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1258
1259 def handle_clone(self, _args, result):
1260 """Transfers cwd."""
1261 if result.startswith(('?', '-1')):
1262 # The call failed.
1263 return
1264 # Update the other process right away.
1265 childpid = int(result)
1266 child = self._root().get_or_set_proc(childpid)
1267 if child.parentid is not None or childpid in self.children:
1268 raise TracingFailure(
1269 'Found internal inconsitency in process lifetime detection '
1270 'during a clone() call',
1271 None, None, None)
1272
1273 # Copy the cwd object.
1274 child.initial_cwd = self.get_cwd()
1275 child.parentid = self.pid
1276 # It is necessary because the logs are processed out of order.
1277 self.children.append(child)
1278
1279 def handle_close(self, _args, _result):
1280 pass
1281
1282 def handle_chmod(self, _args, _result):
1283 pass
1284
1285 def handle_creat(self, _args, _result):
1286 # Ignore files created, since they didn't need to exist.
1287 pass
1288
1289 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1290 def handle_execve(self, args, _result):
1291 # Even if in practice execve() doesn't returns when it succeeds, strace
1292 # still prints '0' as the result.
1293 filepath = args[0]
1294 self._handle_file(filepath, False)
1295 self.executable = self.RelativePath(self.get_cwd(), filepath)
1296 self.command = strace_process_quoted_arguments(args[1])
1297
1298 def handle_exit_group(self, _args, _result):
1299 """Removes cwd."""
1300 self.cwd = None
1301
1302 def handle_fork(self, args, result):
1303 self._handle_unknown('fork', args, result)
1304
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001305 def handle_futex(self, _args, _result):
1306 pass
1307
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001308 def handle_getcwd(self, _args, _result):
1309 pass
1310
1311 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1312 def handle_link(self, args, _result):
1313 self._handle_file(args[0], False)
1314 self._handle_file(args[1], False)
1315
1316 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1317 def handle_lstat(self, args, _result):
1318 self._handle_file(args[0], True)
1319
1320 def handle_mkdir(self, _args, _result):
1321 pass
1322
1323 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1324 def handle_open(self, args, _result):
1325 if 'O_DIRECTORY' in args[1]:
1326 return
1327 self._handle_file(args[0], False)
1328
1329 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1330 def handle_openat(self, args, _result):
1331 if 'O_DIRECTORY' in args[2]:
1332 return
1333 if args[1] == 'AT_FDCWD':
1334 self._handle_file(args[1], False)
1335 else:
1336 # TODO(maruel): Implement relative open if necessary instead of the
1337 # AT_FDCWD flag, let's hope not since this means tracking all active
1338 # directory handles.
1339 raise Exception('Relative open via openat not implemented.')
1340
1341 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1342 def handle_readlink(self, args, _result):
1343 self._handle_file(args[0], False)
1344
1345 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1346 def handle_rename(self, args, _result):
1347 self._handle_file(args[0], False)
1348 self._handle_file(args[1], False)
1349
1350 def handle_rmdir(self, _args, _result):
1351 pass
1352
1353 def handle_setxattr(self, _args, _result):
1354 pass
1355
1356 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1357 def handle_stat(self, args, _result):
1358 self._handle_file(args[0], True)
1359
1360 def handle_symlink(self, _args, _result):
1361 pass
1362
1363 def handle_unlink(self, _args, _result):
1364 # In theory, the file had to be created anyway.
1365 pass
1366
1367 def handle_statfs(self, _args, _result):
1368 pass
1369
1370 def handle_vfork(self, args, result):
1371 self._handle_unknown('vfork', args, result)
1372
1373 @staticmethod
1374 def _handle_unknown(function, args, result):
1375 raise TracingFailure(
1376 'Unexpected/unimplemented trace %s(%s)= %s' %
1377 (function, args, result),
1378 None, None, None)
1379
1380 def _handle_file(self, filepath, touch_only):
1381 filepath = self.RelativePath(self.get_cwd(), filepath)
1382 #assert not touch_only, unicode(filepath)
1383 self.add_file(filepath, touch_only)
1384
1385 def __init__(self, blacklist, initial_cwd):
1386 super(Strace.Context, self).__init__(blacklist)
1387 self.initial_cwd = initial_cwd
1388
1389 def render(self):
1390 """Returns the string value of the initial cwd of the root process.
1391
1392 Used by RelativePath.
1393 """
1394 return self.initial_cwd
1395
1396 def on_line(self, pid, line):
1397 """Transfers control into the Process.on_line() function."""
1398 self.get_or_set_proc(pid).on_line(line.strip())
1399
1400 def to_results(self):
1401 """Finds back the root process and verify consistency."""
1402 # TODO(maruel): Absolutely unecessary, fix me.
1403 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1404 if len(root) != 1:
1405 raise TracingFailure(
1406 'Found internal inconsitency in process lifetime detection '
1407 'while finding the root process',
1408 None,
1409 None,
1410 None,
1411 sorted(p.pid for p in root))
1412 self.root_process = root[0]
1413 process = self.root_process.to_results_process()
1414 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1415 raise TracingFailure(
1416 'Found internal inconsitency in process lifetime detection '
1417 'while looking for len(tree) == len(list)',
1418 None,
1419 None,
1420 None,
1421 sorted(self._process_lookup),
1422 sorted(p.pid for p in process.all))
1423 return Results(process)
1424
1425 def get_or_set_proc(self, pid):
1426 """Returns the Context.Process instance for this pid or creates a new one.
1427 """
1428 if not pid or not isinstance(pid, int):
1429 raise TracingFailure(
1430 'Unpexpected value for pid: %r' % pid,
1431 pid,
1432 None,
1433 None,
1434 pid)
1435 if pid not in self._process_lookup:
1436 self._process_lookup[pid] = self.Process(self, pid)
1437 return self._process_lookup[pid]
1438
1439 @classmethod
1440 def traces(cls):
1441 """Returns the list of all handled traces to pass this as an argument to
1442 strace.
1443 """
1444 prefix = 'handle_'
1445 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1446
1447 class Tracer(ApiBase.Tracer):
1448 MAX_LEN = 256
1449
1450 def trace(self, cmd, cwd, tracename, output):
1451 """Runs strace on an executable."""
1452 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1453 assert os.path.isabs(cmd[0]), cmd[0]
1454 assert os.path.isabs(cwd), cwd
1455 assert os.path.normpath(cwd) == cwd, cwd
1456 with self._lock:
1457 if not self._initialized:
1458 raise TracingFailure(
1459 'Called Tracer.trace() on an unitialized object',
1460 None, None, None, tracename)
1461 assert tracename not in (i['trace'] for i in self._traces)
1462 stdout = stderr = None
1463 if output:
1464 stdout = subprocess.PIPE
1465 stderr = subprocess.STDOUT
1466 # Ensure all file related APIs are hooked.
1467 traces = ','.join(Strace.Context.traces() + ['file'])
1468 trace_cmd = [
1469 'strace',
1470 '-ff',
1471 '-s', '%d' % self.MAX_LEN,
1472 '-e', 'trace=%s' % traces,
1473 '-o', self._logname + '.' + tracename,
1474 ]
1475 child = subprocess.Popen(
1476 trace_cmd + cmd,
1477 cwd=cwd,
1478 stdin=subprocess.PIPE,
1479 stdout=stdout,
1480 stderr=stderr)
1481 out = child.communicate()[0]
1482 # TODO(maruel): Walk the logs and figure out the root process would
1483 # simplify parsing the logs a *lot*.
1484 with self._lock:
1485 assert tracename not in (i['trace'] for i in self._traces)
1486 self._traces.append(
1487 {
1488 'cmd': cmd,
1489 'cwd': cwd,
1490 # The pid of strace process, not very useful.
1491 'pid': child.pid,
1492 'trace': tracename,
1493 'output': out,
1494 })
1495 return child.returncode, out
1496
1497 @staticmethod
1498 def clean_trace(logname):
1499 if os.path.isfile(logname):
1500 os.remove(logname)
1501 # Also delete any pid specific file from previous traces.
1502 for i in glob.iglob(logname + '.*'):
1503 if i.rsplit('.', 1)[1].isdigit():
1504 os.remove(i)
1505
1506 @classmethod
1507 def parse_log(cls, logname, blacklist):
1508 logging.info('parse_log(%s, %s)' % (logname, blacklist))
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001509 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001510 data = read_json(logname)
1511 out = []
1512 for item in data['traces']:
1513 result = {
1514 'trace': item['trace'],
1515 'output': item['output'],
1516 }
1517 try:
1518 context = cls.Context(blacklist, item['cwd'])
1519 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
1520 pid = pidfile.rsplit('.', 1)[1]
1521 if pid.isdigit():
1522 pid = int(pid)
1523 # TODO(maruel): Load as utf-8
1524 for line in open(pidfile, 'rb'):
1525 context.on_line(pid, line)
1526 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001527 except TracingFailure:
1528 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001529 out.append(result)
1530 return out
1531
1532
1533class Dtrace(ApiBase):
1534 """Uses DTrace framework through dtrace. Requires root access.
1535
1536 Implies Mac OSX.
1537
1538 dtruss can't be used because it has compatibility issues with python.
1539
1540 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1541 get the absolute path of the 'cwd' dtrace variable from the probe.
1542
1543 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1544 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1545 manually.
1546
1547 errno is not printed in the log since this implementation currently only cares
1548 about files that were successfully opened.
1549 """
1550 class Context(ApiBase.Context):
1551 # Format: index pid function(args)
1552 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1553
1554 # Arguments parsing.
1555 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1556 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1557 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1558 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1559 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1560 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1561
1562 O_DIRECTORY = 0x100000
1563
1564 class Process(ApiBase.Context.Process):
1565 def __init__(self, *args):
1566 super(Dtrace.Context.Process, self).__init__(*args)
1567 self.cwd = self.initial_cwd
1568
1569 def __init__(self, blacklist, tracer_pid, initial_cwd):
1570 logging.info(
1571 '%s(%d, %s)' % (self.__class__.__name__, tracer_pid, initial_cwd))
1572 super(Dtrace.Context, self).__init__(blacklist)
1573 # Process ID of the temporary script created by create_thunk().
1574 self._tracer_pid = tracer_pid
1575 self._initial_cwd = initial_cwd
1576 self._line_number = 0
1577
1578 def on_line(self, line):
1579 self._line_number += 1
1580 match = self.RE_HEADER.match(line)
1581 if not match:
1582 raise TracingFailure(
1583 'Found malformed line: %s' % line,
1584 None,
1585 self._line_number,
1586 line)
1587 fn = getattr(
1588 self,
1589 'handle_%s' % match.group(2).replace('-', '_'),
1590 self._handle_ignored)
1591 # It is guaranteed to succeed because of the regexp. Or at least I thought
1592 # it would.
1593 pid = int(match.group(1))
1594 try:
1595 return fn(pid, match.group(3))
1596 except TracingFailure, e:
1597 # Hack in the values since the handler could be a static function.
1598 e.pid = pid
1599 e.line = line
1600 e.line_number = self._line_number
1601 # Re-raise the modified exception.
1602 raise
1603 except (KeyError, NotImplementedError, ValueError), e:
1604 raise TracingFailure(
1605 'Trace generated a %s exception: %s' % (
1606 e.__class__.__name__, str(e)),
1607 pid,
1608 self._line_number,
1609 line,
1610 e)
1611
1612 def to_results(self):
1613 process = self.root_process.to_results_process()
1614 # Internal concistency check.
1615 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1616 raise TracingFailure(
1617 'Found internal inconsitency in process lifetime detection '
1618 'while looking for len(tree) == len(list)',
1619 None,
1620 None,
1621 None,
1622 sorted(self._process_lookup),
1623 sorted(p.pid for p in process.all))
1624 return Results(process)
1625
1626 def handle_dtrace_BEGIN(self, _pid, args):
1627 if not self.RE_DTRACE_BEGIN.match(args):
1628 raise TracingFailure(
1629 'Found internal inconsitency in dtrace_BEGIN log line',
1630 None, None, None)
1631
1632 def handle_proc_start(self, pid, args):
1633 """Transfers cwd.
1634
1635 The dtrace script already takes care of only tracing the processes that
1636 are child of the traced processes so there is no need to verify the
1637 process hierarchy.
1638 """
1639 if pid in self._process_lookup:
1640 raise TracingFailure(
1641 'Found internal inconsitency in proc_start: %d started two times' %
1642 pid,
1643 None, None, None)
1644 match = self.RE_PROC_START.match(args)
1645 if not match:
1646 raise TracingFailure(
1647 'Failed to parse arguments: %s' % args,
1648 None, None, None)
1649 ppid = int(match.group(1))
1650 if ppid == self._tracer_pid and not self.root_process:
1651 proc = self.root_process = self.Process(
1652 self.blacklist, pid, self._initial_cwd)
1653 elif ppid in self._process_lookup:
1654 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1655 self._process_lookup[ppid].children.append(proc)
1656 else:
1657 # Another process tree, ignore.
1658 return
1659 self._process_lookup[pid] = proc
1660 logging.debug(
1661 'New child: %s -> %d cwd:%s' %
1662 (ppid, pid, unicode(proc.initial_cwd)))
1663
1664 def handle_proc_exit(self, pid, _args):
1665 """Removes cwd."""
1666 if pid in self._process_lookup:
1667 # self._tracer_pid is not traced itself and other traces run neither.
1668 self._process_lookup[pid].cwd = None
1669
1670 def handle_execve(self, pid, args):
1671 """Sets the process' executable.
1672
1673 TODO(maruel): Read command line arguments. See
1674 https://discussions.apple.com/thread/1980539 for an example.
1675 https://gist.github.com/1242279
1676
1677 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1678 :)
1679 """
1680 if not pid in self._process_lookup:
1681 # Another process tree, ignore.
1682 return
1683 match = self.RE_EXECVE.match(args)
1684 if not match:
1685 raise TracingFailure(
1686 'Failed to parse arguments: %r' % args,
1687 None, None, None)
1688 proc = self._process_lookup[pid]
1689 proc.executable = match.group(1)
1690 proc.command = self.process_escaped_arguments(match.group(3))
1691 if int(match.group(2)) != len(proc.command):
1692 raise TracingFailure(
1693 'Failed to parse execve() arguments: %s' % args,
1694 None, None, None)
1695
1696 def handle_chdir(self, pid, args):
1697 """Updates cwd."""
1698 if pid not in self._process_lookup:
1699 # Another process tree, ignore.
1700 return
1701 cwd = self.RE_CHDIR.match(args).group(1)
1702 if not cwd.startswith('/'):
1703 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1704 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1705 else:
1706 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1707 cwd2 = cwd
1708 self._process_lookup[pid].cwd = cwd2
1709
1710 def handle_open_nocancel(self, pid, args):
1711 """Redirects to handle_open()."""
1712 return self.handle_open(pid, args)
1713
1714 def handle_open(self, pid, args):
1715 if pid not in self._process_lookup:
1716 # Another process tree, ignore.
1717 return
1718 match = self.RE_OPEN.match(args)
1719 if not match:
1720 raise TracingFailure(
1721 'Failed to parse arguments: %s' % args,
1722 None, None, None)
1723 flag = int(match.group(2), 16)
1724 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1725 # Ignore directories.
1726 return
1727 self._handle_file(pid, match.group(1))
1728
1729 def handle_rename(self, pid, args):
1730 if pid not in self._process_lookup:
1731 # Another process tree, ignore.
1732 return
1733 match = self.RE_RENAME.match(args)
1734 if not match:
1735 raise TracingFailure(
1736 'Failed to parse arguments: %s' % args,
1737 None, None, None)
1738 self._handle_file(pid, match.group(1))
1739 self._handle_file(pid, match.group(2))
1740
1741 def _handle_file(self, pid, filepath):
1742 if not filepath.startswith('/'):
1743 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1744 # We can get '..' in the path.
1745 filepath = os.path.normpath(filepath)
1746 # Sadly, still need to filter out directories here;
1747 # saw open_nocancel(".", 0, 0) = 0 lines.
1748 if os.path.isdir(filepath):
1749 return
1750 self._process_lookup[pid].add_file(filepath, False)
1751
1752 def handle_ftruncate(self, pid, args):
1753 """Just used as a signal to kill dtrace, ignoring."""
1754 pass
1755
1756 @staticmethod
1757 def _handle_ignored(pid, args):
1758 """Is called for all the event traces that are not handled."""
1759 raise NotImplementedError('Please implement me')
1760
1761 @staticmethod
1762 def process_escaped_arguments(text):
1763 """Extracts escaped arguments on a string and return the arguments as a
1764 list.
1765
1766 Implemented as an automaton.
1767
1768 Example:
1769 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1770 function will return ['python2.7', '-c', 'print("hi")]
1771 """
1772 if not text.endswith('\\0'):
1773 raise ValueError('String is not null terminated: %r' % text, text)
1774 text = text[:-2]
1775
1776 def unescape(x):
1777 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1778 out = []
1779 escaped = False
1780 for i in x:
1781 if i == '\\' and not escaped:
1782 escaped = True
1783 continue
1784 escaped = False
1785 out.append(i)
1786 return ''.join(out)
1787
1788 return [unescape(i) for i in text.split('\\001')]
1789
1790 class Tracer(ApiBase.Tracer):
1791 # pylint: disable=C0301
1792 #
1793 # To understand the following code, you'll want to take a look at:
1794 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
1795 # https://wikis.oracle.com/display/DTrace/Variables
1796 # http://docs.oracle.com/cd/E19205-01/820-4221/
1797 #
1798 # 0. Dump all the valid probes into a text file. It is important, you
1799 # want to redirect into a file and you don't want to constantly 'sudo'.
1800 # $ sudo dtrace -l > probes.txt
1801 #
1802 # 1. Count the number of probes:
1803 # $ wc -l probes.txt
1804 # 81823 # On OSX 10.7, including 1 header line.
1805 #
1806 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1807 # likes and skipping the header with NR>1:
1808 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1809 # dtrace
1810 # fbt
1811 # io
1812 # ip
1813 # lockstat
1814 # mach_trap
1815 # proc
1816 # profile
1817 # sched
1818 # syscall
1819 # tcp
1820 # vminfo
1821 #
1822 # 3. List of valid probes:
1823 # $ grep syscall probes.txt | less
1824 # or use dtrace directly:
1825 # $ sudo dtrace -l -P syscall | less
1826 #
1827 # trackedpid is an associative array where its value can be 0, 1 or 2.
1828 # 0 is for untracked processes and is the default value for items not
1829 # in the associative array.
1830 # 1 is for tracked processes.
1831 # 2 is for the script created by create_thunk() only. It is not tracked
1832 # itself but all its decendants are.
1833 #
1834 # The script will kill itself only once waiting_to_die == 1 and
1835 # current_processes == 0, so that both getlogin() was called and that
1836 # all traced processes exited.
1837 #
1838 # TODO(maruel): Use cacheable predicates. See
1839 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1840 D_CODE = """
1841 dtrace:::BEGIN {
1842 waiting_to_die = 0;
1843 current_processes = 0;
1844 logindex = 0;
1845 printf("%d %d %s_%s(\\"%s\\")\\n",
1846 logindex, PID, probeprov, probename, SCRIPT);
1847 logindex++;
1848 }
1849
1850 proc:::start /trackedpid[ppid]/ {
1851 trackedpid[pid] = 1;
1852 current_processes += 1;
1853 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1854 logindex, pid, probeprov, probename,
1855 ppid,
1856 execname,
1857 current_processes);
1858 logindex++;
1859 }
1860 /* Should use SCRIPT but there is no access to this variable at that
1861 * point. */
1862 proc:::start /ppid == PID && execname == "Python"/ {
1863 trackedpid[pid] = 2;
1864 current_processes += 1;
1865 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1866 logindex, pid, probeprov, probename,
1867 ppid,
1868 execname,
1869 current_processes);
1870 logindex++;
1871 }
1872 proc:::exit /trackedpid[pid] &&
1873 current_processes == 1 &&
1874 waiting_to_die == 1/ {
1875 trackedpid[pid] = 0;
1876 current_processes -= 1;
1877 printf("%d %d %s_%s(%d)\\n",
1878 logindex, pid, probeprov, probename,
1879 current_processes);
1880 logindex++;
1881 exit(0);
1882 }
1883 proc:::exit /trackedpid[pid]/ {
1884 trackedpid[pid] = 0;
1885 current_processes -= 1;
1886 printf("%d %d %s_%s(%d)\\n",
1887 logindex, pid, probeprov, probename,
1888 current_processes);
1889 logindex++;
1890 }
1891
1892 /* Use an arcane function to detect when we need to die */
1893 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
1894 waiting_to_die = 1;
1895 printf("%d %d %s()\\n", logindex, pid, probefunc);
1896 logindex++;
1897 }
1898 syscall::ftruncate:entry /
1899 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
1900 exit(0);
1901 }
1902
1903 syscall::open*:entry /trackedpid[pid] == 1/ {
1904 self->open_arg0 = arg0;
1905 self->open_arg1 = arg1;
1906 self->open_arg2 = arg2;
1907 }
1908 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
1909 this->open_arg0 = copyinstr(self->open_arg0);
1910 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
1911 logindex, pid, probefunc,
1912 this->open_arg0,
1913 self->open_arg1,
1914 self->open_arg2);
1915 logindex++;
1916 this->open_arg0 = 0;
1917 }
1918 syscall::open*:return /trackedpid[pid] == 1/ {
1919 self->open_arg0 = 0;
1920 self->open_arg1 = 0;
1921 self->open_arg2 = 0;
1922 }
1923
1924 syscall::rename:entry /trackedpid[pid] == 1/ {
1925 self->rename_arg0 = arg0;
1926 self->rename_arg1 = arg1;
1927 }
1928 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
1929 this->rename_arg0 = copyinstr(self->rename_arg0);
1930 this->rename_arg1 = copyinstr(self->rename_arg1);
1931 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
1932 logindex, pid, probefunc,
1933 this->rename_arg0,
1934 this->rename_arg1);
1935 logindex++;
1936 this->rename_arg0 = 0;
1937 this->rename_arg1 = 0;
1938 }
1939 syscall::rename:return /trackedpid[pid] == 1/ {
1940 self->rename_arg0 = 0;
1941 self->rename_arg1 = 0;
1942 }
1943
1944 /* Track chdir, it's painful because it is only receiving relative path.
1945 */
1946 syscall::chdir:entry /trackedpid[pid] == 1/ {
1947 self->chdir_arg0 = arg0;
1948 }
1949 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
1950 this->chdir_arg0 = copyinstr(self->chdir_arg0);
1951 printf("%d %d %s(\\"%s\\")\\n",
1952 logindex, pid, probefunc,
1953 this->chdir_arg0);
1954 logindex++;
1955 this->chdir_arg0 = 0;
1956 }
1957 syscall::chdir:return /trackedpid[pid] == 1/ {
1958 self->chdir_arg0 = 0;
1959 }
1960 """
1961
1962 # execve-specific code, tends to throw a lot of exceptions.
1963 D_CODE_EXECVE = """
1964 /* Finally what we care about! */
1965 syscall::exec*:entry /trackedpid[pid]/ {
1966 self->exec_arg0 = copyinstr(arg0);
1967 /* Incrementally probe for a NULL in the argv parameter of execve() to
1968 * figure out argc. */
1969 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
1970 * found. */
1971 self->exec_argc = 0;
1972 /* Probe for argc==1 */
1973 this->exec_argv = (user_addr_t*)copyin(
1974 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1975 self->exec_argc = this->exec_argv[self->exec_argc] ?
1976 (self->exec_argc + 1) : self->exec_argc;
1977
1978 /* Probe for argc==2 */
1979 this->exec_argv = (user_addr_t*)copyin(
1980 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1981 self->exec_argc = this->exec_argv[self->exec_argc] ?
1982 (self->exec_argc + 1) : self->exec_argc;
1983
1984 /* Probe for argc==3 */
1985 this->exec_argv = (user_addr_t*)copyin(
1986 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1987 self->exec_argc = this->exec_argv[self->exec_argc] ?
1988 (self->exec_argc + 1) : self->exec_argc;
1989
1990 /* Probe for argc==4 */
1991 this->exec_argv = (user_addr_t*)copyin(
1992 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1993 self->exec_argc = this->exec_argv[self->exec_argc] ?
1994 (self->exec_argc + 1) : self->exec_argc;
1995
1996 /* Copy the inputs strings since there is no guarantee they'll be
1997 * present after the call completed. */
1998 self->exec_argv0 = (self->exec_argc > 0) ?
1999 copyinstr(this->exec_argv[0]) : "";
2000 self->exec_argv1 = (self->exec_argc > 1) ?
2001 copyinstr(this->exec_argv[1]) : "";
2002 self->exec_argv2 = (self->exec_argc > 2) ?
2003 copyinstr(this->exec_argv[2]) : "";
2004 self->exec_argv3 = (self->exec_argc > 3) ?
2005 copyinstr(this->exec_argv[3]) : "";
2006 this->exec_argv = 0;
2007 }
2008 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2009 /* We need to join strings here, as using multiple printf() would
2010 * cause tearing when multiple threads/processes are traced.
2011 * Since it is impossible to escape a string and join it to another one,
2012 * like sprintf("%s%S", previous, more), use hackery.
2013 * Each of the elements are split with a \\1. \\0 cannot be used because
2014 * it is simply ignored. This will conflict with any program putting a
2015 * \\1 in their execve() string but this should be "rare enough" */
2016 this->args = "";
2017 /* Process exec_argv[0] */
2018 this->args = strjoin(
2019 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2020
2021 /* Process exec_argv[1] */
2022 this->args = strjoin(
2023 this->args, (self->exec_argc > 1) ? "\\1" : "");
2024 this->args = strjoin(
2025 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2026
2027 /* Process exec_argv[2] */
2028 this->args = strjoin(
2029 this->args, (self->exec_argc > 2) ? "\\1" : "");
2030 this->args = strjoin(
2031 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2032
2033 /* Process exec_argv[3] */
2034 this->args = strjoin(
2035 this->args, (self->exec_argc > 3) ? "\\1" : "");
2036 this->args = strjoin(
2037 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2038
2039 /* Prints self->exec_argc to permits verifying the internal
2040 * consistency since this code is quite fishy. */
2041 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2042 logindex, pid, probefunc,
2043 self->exec_arg0,
2044 self->exec_argc,
2045 this->args);
2046 logindex++;
2047 this->args = 0;
2048 }
2049 syscall::exec*:return /trackedpid[pid]/ {
2050 self->exec_arg0 = 0;
2051 self->exec_argc = 0;
2052 self->exec_argv0 = 0;
2053 self->exec_argv1 = 0;
2054 self->exec_argv2 = 0;
2055 self->exec_argv3 = 0;
2056 }
2057 """
2058
2059 # Code currently not used.
2060 D_EXTRANEOUS = """
2061 /* This is a good learning experience, since it traces a lot of things
2062 * related to the process and child processes.
2063 * Warning: it generates a gigantic log. For example, tracing
2064 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2065 * several minutes to execute.
2066 */
2067 /*
2068 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2069 printf("%d %d %s_%s() = %d\\n",
2070 logindex, pid, probeprov, probefunc, errno);
2071 logindex++;
2072 }
2073 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2074 printf("%d %d %s_%s() = %d\\n",
2075 logindex, pid, probeprov, probefunc, errno);
2076 logindex++;
2077 }
2078 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2079 printf("%d %d %s_%s() = %d\\n",
2080 logindex, pid, probeprov, probefunc, errno);
2081 logindex++;
2082 }
2083 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2084 printf("%d %d %s_%s() = %d\\n",
2085 logindex, pid, probeprov, probefunc, errno);
2086 logindex++;
2087 }
2088 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2089 printf("%d %d %s_%s() = %d\\n",
2090 logindex, pid, probeprov, probefunc, errno);
2091 logindex++;
2092 }
2093 */
2094 /* TODO(maruel): *stat* functions and friends
2095 syscall::access:return,
2096 syscall::chdir:return,
2097 syscall::chflags:return,
2098 syscall::chown:return,
2099 syscall::chroot:return,
2100 syscall::getattrlist:return,
2101 syscall::getxattr:return,
2102 syscall::lchown:return,
2103 syscall::lstat64:return,
2104 syscall::lstat:return,
2105 syscall::mkdir:return,
2106 syscall::pathconf:return,
2107 syscall::readlink:return,
2108 syscall::removexattr:return,
2109 syscall::setxattr:return,
2110 syscall::stat64:return,
2111 syscall::stat:return,
2112 syscall::truncate:return,
2113 syscall::unlink:return,
2114 syscall::utimes:return,
2115 */
2116 """
2117
2118 def __init__(self, logname):
2119 """Starts the log collection with dtrace.
2120
2121 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2122 this needs to wait for dtrace to be "warmed up".
2123 """
2124 super(Dtrace.Tracer, self).__init__(logname)
2125 self._script = create_thunk()
2126 # This unique dummy temp file is used to signal the dtrace script that it
2127 # should stop as soon as all the child processes are done. A bit hackish
2128 # but works fine enough.
2129 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2130 prefix='trace_signal_file')
2131
2132 # Note: do not use the -p flag. It's useless if the initial process quits
2133 # too fast, resulting in missing traces from the grand-children. The D
2134 # code manages the dtrace lifetime itself.
2135 trace_cmd = [
2136 'sudo',
2137 'dtrace',
2138 # Use a larger buffer if getting 'out of scratch space' errors.
2139 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2140 '-b', '10m',
2141 '-x', 'dynvarsize=10m',
2142 #'-x', 'dtrace_global_maxsize=1m',
2143 '-x', 'evaltime=exec',
2144 '-o', '/dev/stderr',
2145 '-q',
2146 '-n', self._get_dtrace_code(),
2147 ]
2148 with open(self._logname + '.log', 'wb') as logfile:
2149 self._dtrace = subprocess.Popen(
2150 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2151 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2152
2153 # Reads until one line is printed, which signifies dtrace is up and ready.
2154 with open(self._logname + '.log', 'rb') as logfile:
2155 while 'dtrace_BEGIN' not in logfile.readline():
2156 if self._dtrace.poll() is not None:
2157 # Do a busy wait. :/
2158 break
2159 logging.debug('dtrace started')
2160
2161 def _get_dtrace_code(self):
2162 """Setups the D code to implement child process tracking.
2163
2164 Injects the cookie in the script so it knows when to stop.
2165
2166 The script will detect any instance of the script created with
2167 create_thunk() and will start tracing it.
2168 """
2169 return (
2170 'inline int PID = %d;\n'
2171 'inline string SCRIPT = "%s";\n'
2172 'inline int FILE_ID = %d;\n'
2173 '\n'
2174 '%s') % (
2175 os.getpid(),
2176 self._script,
2177 self._dummy_file_id,
2178 self.D_CODE) + self.D_CODE_EXECVE
2179
2180 def trace(self, cmd, cwd, tracename, output):
2181 """Runs dtrace on an executable.
2182
2183 This dtruss is broken when it starts the process itself or when tracing
2184 child processes, this code starts a wrapper process
2185 generated with create_thunk() which starts the executable to trace.
2186 """
2187 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2188 assert os.path.isabs(cmd[0]), cmd[0]
2189 assert os.path.isabs(cwd), cwd
2190 assert os.path.normpath(cwd) == cwd, cwd
2191 with self._lock:
2192 if not self._initialized:
2193 raise TracingFailure(
2194 'Called Tracer.trace() on an unitialized object',
2195 None, None, None, tracename)
2196 assert tracename not in (i['trace'] for i in self._traces)
2197
2198 # Starts the script wrapper to start the child process. This signals the
2199 # dtrace script that this process is to be traced.
2200 stdout = stderr = None
2201 if output:
2202 stdout = subprocess.PIPE
2203 stderr = subprocess.STDOUT
2204 child_cmd = [
2205 sys.executable,
2206 self._script,
2207 tracename,
2208 ]
2209 # Call a dummy function so that dtrace knows I'm about to launch a process
2210 # that needs to be traced.
2211 # Yummy.
2212 child = subprocess.Popen(
2213 child_cmd + fix_python_path(cmd),
2214 stdin=subprocess.PIPE,
2215 stdout=stdout,
2216 stderr=stderr,
2217 cwd=cwd)
2218 logging.debug('Started child pid: %d' % child.pid)
2219
2220 out = child.communicate()[0]
2221 # This doesn't mean tracing is done, one of the grand-child process may
2222 # still be alive. It will be tracked with the dtrace script.
2223
2224 with self._lock:
2225 assert tracename not in (i['trace'] for i in self._traces)
2226 self._traces.append(
2227 {
2228 'cmd': cmd,
2229 'cwd': cwd,
2230 # The pid of strace process, not very useful.
2231 'pid': child.pid,
2232 'trace': tracename,
2233 'output': out,
2234 })
2235 return child.returncode, out
2236
2237 def close(self, timeout=None):
2238 """Terminates dtrace."""
2239 logging.debug('close(%s)' % timeout)
2240 try:
2241 try:
2242 super(Dtrace.Tracer, self).close(timeout)
2243 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002244 # ftruncate doesn't exist on Windows.
2245 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002246 if timeout:
2247 start = time.time()
2248 # Use polling. :/
2249 while (self._dtrace.poll() is None and
2250 (time.time() - start) < timeout):
2251 time.sleep(0.1)
2252 self._dtrace.kill()
2253 self._dtrace.wait()
2254 finally:
2255 # Make sure to kill it in any case.
2256 if self._dtrace.poll() is None:
2257 try:
2258 self._dtrace.kill()
2259 self._dtrace.wait()
2260 except OSError:
2261 pass
2262
2263 if self._dtrace.returncode != 0:
2264 # Warn about any dtrace failure but basically ignore it.
2265 print 'dtrace failure: %s' % self._dtrace.returncode
2266 finally:
2267 os.close(self._dummy_file_id)
2268 os.remove(self._dummy_file_name)
2269 os.remove(self._script)
2270
2271 def post_process_log(self):
2272 """Sorts the log back in order when each call occured.
2273
2274 dtrace doesn't save the buffer in strict order since it keeps one buffer
2275 per CPU.
2276 """
2277 super(Dtrace.Tracer, self).post_process_log()
2278 logname = self._logname + '.log'
2279 with open(logname, 'rb') as logfile:
2280 lines = [l for l in logfile if l.strip()]
2281 errors = [l for l in lines if l.startswith('dtrace:')]
2282 if errors:
2283 raise TracingFailure(
2284 'Found errors in the trace: %s' % '\n'.join(errors),
2285 None, None, None, logname)
2286 try:
2287 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2288 except ValueError:
2289 raise TracingFailure(
2290 'Found errors in the trace: %s' % '\n'.join(
2291 l for l in lines if l.split(' ', 1)[0].isdigit()),
2292 None, None, None, logname)
2293 with open(logname, 'wb') as logfile:
2294 logfile.write(''.join(lines))
2295
2296 @staticmethod
2297 def clean_trace(logname):
2298 for ext in ('', '.log'):
2299 if os.path.isfile(logname + ext):
2300 os.remove(logname + ext)
2301
2302 @classmethod
2303 def parse_log(cls, logname, blacklist):
2304 logging.info('parse_log(%s, ...)' % logname)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002305 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002306
2307 def blacklist_more(filepath):
2308 # All the HFS metadata is in the form /.vol/...
2309 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2310
2311 data = read_json(logname)
2312 out = []
2313 for item in data['traces']:
2314 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
2315 for line in open(logname + '.log', 'rb'):
2316 context.on_line(line)
2317 out.append(
2318 {
2319 'results': context.to_results(),
2320 'trace': item['trace'],
2321 'output': item['output'],
2322 })
2323 return out
2324
2325
2326class LogmanTrace(ApiBase):
2327 """Uses the native Windows ETW based tracing functionality to trace a child
2328 process.
2329
2330 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2331 the Windows Kernel doesn't have a concept of 'current working directory' at
2332 all. A Win32 process has a map of current directories, one per drive letter
2333 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2334 opened relative to another file_object or as an absolute path. All the current
2335 working directory logic is done in user mode.
2336 """
2337 class Context(ApiBase.Context):
2338 """Processes a ETW log line and keeps the list of existent and non
2339 existent files accessed.
2340
2341 Ignores directories.
2342 """
2343 # These indexes are for the stripped version in json.
2344 EVENT_NAME = 0
2345 TYPE = 1
2346 PID = 2
2347 TID = 3
2348 PROCESSOR_ID = 4
2349 TIMESTAMP = 5
2350 USER_DATA = 6
2351
2352 class Process(ApiBase.Context.Process):
2353 def __init__(self, *args):
2354 super(LogmanTrace.Context.Process, self).__init__(*args)
2355 # Handle file objects that succeeded.
2356 self.file_objects = {}
2357
2358 def __init__(self, blacklist, tracer_pid):
2359 logging.info('%s(%d)' % (self.__class__.__name__, tracer_pid))
2360 super(LogmanTrace.Context, self).__init__(blacklist)
2361 self._drive_map = DosDriveMap()
2362 # Threads mapping to the corresponding process id.
2363 self._threads_active = {}
2364 # Process ID of the tracer, e.g. tracer_inputs.py
2365 self._tracer_pid = tracer_pid
2366 self._line_number = 0
2367
2368 def on_line(self, line):
2369 """Processes a json Event line."""
2370 self._line_number += 1
2371 try:
2372 # By Opcode
2373 handler = getattr(
2374 self,
2375 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2376 None)
2377 if not handler:
2378 raise TracingFailure(
2379 'Unexpected event %s_%s' % (
2380 line[self.EVENT_NAME], line[self.TYPE]),
2381 None, None, None)
2382 handler(line)
2383 except TracingFailure, e:
2384 # Hack in the values since the handler could be a static function.
2385 e.pid = line[self.PID]
2386 e.line = line
2387 e.line_number = self._line_number
2388 # Re-raise the modified exception.
2389 raise
2390 except (KeyError, NotImplementedError, ValueError), e:
2391 raise TracingFailure(
2392 'Trace generated a %s exception: %s' % (
2393 e.__class__.__name__, str(e)),
2394 line[self.PID],
2395 self._line_number,
2396 line,
2397 e)
2398
2399 def to_results(self):
2400 if not self.root_process:
2401 raise TracingFailure(
2402 'Failed to detect the initial process',
2403 None, None, None)
2404 process = self.root_process.to_results_process()
2405 return Results(process)
2406
2407 def _thread_to_process(self, tid):
2408 """Finds the process from the thread id."""
2409 tid = int(tid, 16)
2410 pid = self._threads_active.get(tid)
2411 if not pid or not self._process_lookup.get(pid):
2412 return
2413 return self._process_lookup[pid]
2414
2415 @classmethod
2416 def handle_EventTrace_Header(cls, line):
2417 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2418 BUFFER_SIZE = cls.USER_DATA
2419 #VERSION = cls.USER_DATA + 1
2420 #PROVIDER_VERSION = cls.USER_DATA + 2
2421 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2422 #END_TIME = cls.USER_DATA + 4
2423 #TIMER_RESOLUTION = cls.USER_DATA + 5
2424 #MAX_FILE_SIZE = cls.USER_DATA + 6
2425 #LOG_FILE_MODE = cls.USER_DATA + 7
2426 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2427 #START_BUFFERS = cls.USER_DATA + 9
2428 #POINTER_SIZE = cls.USER_DATA + 10
2429 EVENTS_LOST = cls.USER_DATA + 11
2430 #CPU_SPEED = cls.USER_DATA + 12
2431 #LOGGER_NAME = cls.USER_DATA + 13
2432 #LOG_FILE_NAME = cls.USER_DATA + 14
2433 #BOOT_TIME = cls.USER_DATA + 15
2434 #PERF_FREQ = cls.USER_DATA + 16
2435 #START_TIME = cls.USER_DATA + 17
2436 #RESERVED_FLAGS = cls.USER_DATA + 18
2437 #BUFFERS_LOST = cls.USER_DATA + 19
2438 #SESSION_NAME_STRING = cls.USER_DATA + 20
2439 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2440 if line[EVENTS_LOST] != '0':
2441 raise TracingFailure(
2442 ( '%s events were lost during trace, please increase the buffer '
2443 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2444 None, None, None)
2445
2446 def handle_FileIo_Cleanup(self, line):
2447 """General wisdom: if a file is closed, it's because it was opened.
2448
2449 Note that FileIo_Close is not used since if a file was opened properly but
2450 not closed before the process exits, only Cleanup will be logged.
2451 """
2452 #IRP = self.USER_DATA
2453 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2454 FILE_OBJECT = self.USER_DATA + 2
2455 #FILE_KEY = self.USER_DATA + 3
2456 proc = self._thread_to_process(line[TTID])
2457 if not proc:
2458 # Not a process we care about.
2459 return
2460 file_object = line[FILE_OBJECT]
2461 if file_object in proc.file_objects:
2462 proc.add_file(proc.file_objects.pop(file_object), False)
2463
2464 def handle_FileIo_Create(self, line):
2465 """Handles a file open.
2466
2467 All FileIo events are described at
2468 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2469 for some value of 'description'.
2470
2471 " (..) process and thread id values of the IO events (..) are not valid "
2472 http://msdn.microsoft.com/magazine/ee358703.aspx
2473
2474 The FileIo.Create event doesn't return if the CreateFile() call
2475 succeeded, so keep track of the file_object and check that it is
2476 eventually closed with FileIo_Cleanup.
2477 """
2478 #IRP = self.USER_DATA
2479 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2480 FILE_OBJECT = self.USER_DATA + 2
2481 #CREATE_OPTIONS = self.USER_DATA + 3
2482 #FILE_ATTRIBUTES = self.USER_DATA + 4
2483 #self.USER_DATA + SHARE_ACCESS = 5
2484 OPEN_PATH = self.USER_DATA + 6
2485
2486 proc = self._thread_to_process(line[TTID])
2487 if not proc:
2488 # Not a process we care about.
2489 return
2490
2491 match = re.match(r'^\"(.+)\"$', line[OPEN_PATH])
2492 raw_path = match.group(1)
2493 # Ignore directories and bare drive right away.
2494 if raw_path.endswith(os.path.sep):
2495 return
2496 filepath = self._drive_map.to_win32(raw_path)
2497 # Ignore bare drive right away. Some may still fall through with format
2498 # like '\\?\X:'
2499 if len(filepath) == 2:
2500 return
2501 file_object = line[FILE_OBJECT]
2502 if os.path.isdir(filepath):
2503 # There is no O_DIRECTORY equivalent on Windows. The closed is
2504 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2505 # simply discard directories are they are found.
2506 return
2507 # Override any stale file object
2508 proc.file_objects[file_object] = filepath
2509
2510 def handle_FileIo_Rename(self, line):
2511 # TODO(maruel): Handle?
2512 pass
2513
2514 def handle_Process_End(self, line):
2515 pid = line[self.PID]
2516 if self._process_lookup.get(pid):
2517 logging.info('Terminated: %d' % pid)
2518 self._process_lookup[pid] = None
2519 else:
2520 logging.debug('Terminated: %d' % pid)
2521
2522 def handle_Process_Start(self, line):
2523 """Handles a new child process started by PID."""
2524 #UNIQUE_PROCESS_KEY = self.USER_DATA
2525 PROCESS_ID = self.USER_DATA + 1
2526 #PARENT_PID = self.USER_DATA + 2
2527 #SESSION_ID = self.USER_DATA + 3
2528 #EXIT_STATUS = self.USER_DATA + 4
2529 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2530 #USER_SID = self.USER_DATA + 6
2531 IMAGE_FILE_NAME = self.USER_DATA + 7
2532 COMMAND_LINE = self.USER_DATA + 8
2533
2534 ppid = line[self.PID]
2535 pid = int(line[PROCESS_ID], 16)
2536 logging.debug(
2537 'New process %d->%d (%s) %s' %
2538 (ppid, pid, line[IMAGE_FILE_NAME], line[COMMAND_LINE]))
2539
2540 if ppid == self._tracer_pid:
2541 # Need to ignore processes we don't know about because the log is
2542 # system-wide. self._tracer_pid shall start only one process.
2543 if self.root_process:
2544 raise TracingFailure(
2545 ( 'Parent process is _tracer_pid(%d) but root_process(%d) is '
2546 'already set') % (self._tracer_pid, self.root_process.pid),
2547 None, None, None)
2548 proc = self.Process(self.blacklist, pid, None)
2549 self.root_process = proc
2550 ppid = None
2551 elif self._process_lookup.get(ppid):
2552 proc = self.Process(self.blacklist, pid, None)
2553 self._process_lookup[ppid].children.append(proc)
2554 else:
2555 # Ignore
2556 return
2557 self._process_lookup[pid] = proc
2558
2559 if (not line[IMAGE_FILE_NAME].startswith('"') or
2560 not line[IMAGE_FILE_NAME].endswith('"')):
2561 raise TracingFailure(
2562 'Command line is not properly quoted: %s' % line[IMAGE_FILE_NAME],
2563 None, None, None)
2564
2565 # TODO(maruel): Process escapes.
2566 if (not line[COMMAND_LINE].startswith('"') or
2567 not line[COMMAND_LINE].endswith('"')):
2568 raise TracingFailure(
2569 'Command line is not properly quoted: %s' % line[COMMAND_LINE],
2570 None, None, None)
2571 proc.command = CommandLineToArgvW(line[COMMAND_LINE][1:-1])
2572 proc.executable = line[IMAGE_FILE_NAME][1:-1]
2573 # proc.command[0] may be the absolute path of 'executable' but it may be
2574 # anything else too. If it happens that command[0] ends with executable,
2575 # use it, otherwise defaults to the base name.
2576 cmd0 = proc.command[0].lower()
2577 if not cmd0.endswith('.exe'):
2578 # TODO(maruel): That's not strictly true either.
2579 cmd0 += '.exe'
2580 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
2581 # Fix the path.
2582 cmd0 = cmd0.replace('/', os.path.sep)
2583 cmd0 = os.path.normpath(cmd0)
2584 proc.executable = get_native_path_case(cmd0)
2585 logging.info(
2586 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2587
2588 def handle_Thread_End(self, line):
2589 """Has the same parameters as Thread_Start."""
2590 tid = int(line[self.TID], 16)
2591 self._threads_active.pop(tid, None)
2592
2593 def handle_Thread_Start(self, line):
2594 """Handles a new thread created.
2595
2596 Do not use self.PID here since a process' initial thread is created by
2597 the parent process.
2598 """
2599 PROCESS_ID = self.USER_DATA
2600 TTHREAD_ID = self.USER_DATA + 1
2601 #STACK_BASE = self.USER_DATA + 2
2602 #STACK_LIMIT = self.USER_DATA + 3
2603 #USER_STACK_BASE = self.USER_DATA + 4
2604 #USER_STACK_LIMIT = self.USER_DATA + 5
2605 #AFFINITY = self.USER_DATA + 6
2606 #WIN32_START_ADDR = self.USER_DATA + 7
2607 #TEB_BASE = self.USER_DATA + 8
2608 #SUB_PROCESS_TAG = self.USER_DATA + 9
2609 #BASE_PRIORITY = self.USER_DATA + 10
2610 #PAGE_PRIORITY = self.USER_DATA + 11
2611 #IO_PRIORITY = self.USER_DATA + 12
2612 #THREAD_FLAGS = self.USER_DATA + 13
2613 # Do not use self.PID here since a process' initial thread is created by
2614 # the parent process.
2615 pid = int(line[PROCESS_ID], 16)
2616 tid = int(line[TTHREAD_ID], 16)
2617 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2618 self._threads_active[tid] = pid
2619
2620 @classmethod
2621 def supported_events(cls):
2622 """Returns all the procesed events."""
2623 out = []
2624 for member in dir(cls):
2625 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2626 if match:
2627 out.append(match.groups())
2628 return out
2629
2630 class Tracer(ApiBase.Tracer):
2631 # The basic headers.
2632 EXPECTED_HEADER = [
2633 u'Event Name',
2634 u'Type',
2635 u'Event ID',
2636 u'Version',
2637 u'Channel',
2638 u'Level', # 5
2639 u'Opcode',
2640 u'Task',
2641 u'Keyword',
2642 u'PID',
2643 u'TID', # 10
2644 u'Processor Number',
2645 u'Instance ID',
2646 u'Parent Instance ID',
2647 u'Activity ID',
2648 u'Related Activity ID', # 15
2649 u'Clock-Time',
2650 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2651 u'User(ms)', # pretty much useless.
2652 u'User Data', # Extra arguments that are event-specific.
2653 ]
2654 # Only the useful headers common to all entries are listed there. Any column
2655 # at 19 or higher is dependent on the specific event.
2656 EVENT_NAME = 0
2657 TYPE = 1
2658 PID = 9
2659 TID = 10
2660 PROCESSOR_ID = 11
2661 TIMESTAMP = 16
2662 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2663 USER_DATA = 19
2664
2665 def __init__(self, logname):
2666 """Starts the log collection.
2667
2668 Requires administrative access. logman.exe is synchronous so no need for a
2669 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2670 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2671
2672 One can get the list of potentially interesting providers with:
2673 "logman query providers | findstr /i file"
2674 """
2675 super(LogmanTrace.Tracer, self).__init__(logname)
2676 self._script = create_thunk()
2677 cmd_start = [
2678 'logman.exe',
2679 'start',
2680 'NT Kernel Logger',
2681 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2682 # splitio,fileiocompletion,syscall,file,cswitch,img
2683 '(process,fileio,thread)',
2684 '-o', self._logname + '.etl',
2685 '-ets', # Send directly to kernel
2686 # Values extracted out of thin air.
2687 # Event Trace Session buffer size in kb.
2688 '-bs', '10240',
2689 # Number of Event Trace Session buffers.
2690 '-nb', '16', '256',
2691 ]
2692 logging.debug('Running: %s' % cmd_start)
2693 try:
2694 subprocess.check_call(
2695 cmd_start,
2696 stdin=subprocess.PIPE,
2697 stdout=subprocess.PIPE,
2698 stderr=subprocess.STDOUT)
2699 except subprocess.CalledProcessError, e:
2700 if e.returncode == -2147024891:
2701 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2702 elif e.returncode == -2144337737:
2703 print >> sys.stderr, (
2704 'A kernel trace was already running, stop it and try again')
2705 raise
2706
2707 def trace(self, cmd, cwd, tracename, output):
2708 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2709 assert os.path.isabs(cmd[0]), cmd[0]
2710 assert os.path.isabs(cwd), cwd
2711 assert os.path.normpath(cwd) == cwd, cwd
2712 with self._lock:
2713 if not self._initialized:
2714 raise TracingFailure(
2715 'Called Tracer.trace() on an unitialized object',
2716 None, None, None, tracename)
2717 assert tracename not in (i['trace'] for i in self._traces)
2718
2719 # Use "logman -?" for help.
2720
2721 stdout = stderr = None
2722 if output:
2723 stdout = subprocess.PIPE
2724 stderr = subprocess.STDOUT
2725
2726 # Run the child process.
2727 logging.debug('Running: %s' % cmd)
2728 # Use the temporary script generated with create_thunk() so we have a
2729 # clear pid owner. Since trace_inputs.py can be used as a library and
2730 # could trace multiple processes simultaneously, it makes it more complex
2731 # if the executable to be traced is executed directly here. It also solves
2732 # issues related to logman.exe that needs to be executed to control the
2733 # kernel trace.
2734 child_cmd = [
2735 sys.executable,
2736 self._script,
2737 tracename,
2738 ]
2739 child = subprocess.Popen(
2740 child_cmd + fix_python_path(cmd),
2741 cwd=cwd,
2742 stdin=subprocess.PIPE,
2743 stdout=stdout,
2744 stderr=stderr)
2745 logging.debug('Started child pid: %d' % child.pid)
2746 out = child.communicate()[0]
2747 # This doesn't mean all the grand-children are done. Sadly, we don't have
2748 # a good way to determine that.
2749
2750 with self._lock:
2751 assert tracename not in (i['trace'] for i in self._traces)
2752 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002753 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002754 'cwd': cwd,
2755 'pid': child.pid,
2756 'trace': tracename,
2757 'output': out,
2758 })
2759
2760 return child.returncode, out
2761
2762 def close(self, _timeout=None):
2763 """Stops the kernel log collection and converts the traces to text
2764 representation.
2765 """
2766 with self._lock:
2767 if not self._initialized:
2768 raise TracingFailure(
2769 'Called Tracer.close() on an unitialized object',
2770 None, None, None)
2771 os.remove(self._script)
2772 # Save metadata, add 'format' key..
2773 data = {
2774 'format': 'csv',
2775 'traces': self._traces,
2776 }
2777 write_json(self._logname, data, False)
2778
2779 cmd_stop = [
2780 'logman.exe',
2781 'stop',
2782 'NT Kernel Logger',
2783 '-ets', # Sends the command directly to the kernel.
2784 ]
2785 logging.debug('Running: %s' % cmd_stop)
2786 subprocess.check_call(
2787 cmd_stop,
2788 stdin=subprocess.PIPE,
2789 stdout=subprocess.PIPE,
2790 stderr=subprocess.STDOUT)
2791 self._initialized = False
2792
2793 def post_process_log(self):
2794 """Converts the .etl file into .csv then into .json."""
2795 super(LogmanTrace.Tracer, self).post_process_log()
2796 logformat = 'csv'
2797 self._convert_log(logformat)
2798
2799 if logformat == 'csv_utf16':
2800 def load_file():
2801 def utf_8_encoder(unicode_csv_data):
2802 """Encodes the unicode object as utf-8 encoded str instance"""
2803 for line in unicode_csv_data:
2804 yield line.encode('utf-8')
2805
2806 def unicode_csv_reader(unicode_csv_data, **kwargs):
2807 """Encodes temporarily as UTF-8 since csv module doesn't do unicode.
2808 """
2809 csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), **kwargs)
2810 for row in csv_reader:
2811 # Decode str utf-8 instances back to unicode instances, cell by
2812 # cell:
2813 yield [cell.decode('utf-8') for cell in row]
2814
2815 # The CSV file is UTF-16 so use codecs.open() to load the file into
2816 # the python internal unicode format (utf-8). Then explicitly
2817 # re-encode as utf8 as str instances so csv can parse it fine. Then
2818 # decode the utf-8 str back into python unicode instances. This
2819 # sounds about right.
2820 for line in unicode_csv_reader(
2821 codecs.open(self._logname + '.' + logformat, 'r', 'utf-16')):
2822 # line is a list of unicode objects
2823 # So much white space!
2824 yield [i.strip() for i in line]
2825
2826 elif logformat == 'csv':
2827 def load_file():
2828 def ansi_csv_reader(ansi_csv_data, **kwargs):
2829 """Loads an 'ANSI' code page and returns unicode() objects."""
2830 assert sys.getfilesystemencoding() == 'mbcs'
2831 encoding = get_current_encoding()
2832 for row in csv.reader(ansi_csv_data, **kwargs):
2833 # Decode str 'ansi' instances to unicode instances, cell by cell:
2834 yield [cell.decode(encoding) for cell in row]
2835
2836 # The fastest and smallest format but only supports 'ANSI' file paths.
2837 # E.g. the filenames are encoding in the 'current' encoding.
2838 for line in ansi_csv_reader(open(self._logname + '.' + logformat)):
2839 # line is a list of unicode objects.
2840 yield [i.strip() for i in line]
2841
2842 supported_events = LogmanTrace.Context.supported_events()
2843
2844 def trim(generator):
2845 for index, line in enumerate(generator):
2846 if not index:
2847 if line != self.EXPECTED_HEADER:
2848 raise TracingFailure(
2849 'Found malformed header: %s' % ' '.join(line),
2850 None, None, None)
2851 continue
2852 # As you can see, the CSV is full of useful non-redundant information:
2853 if (line[2] != '0' or # Event ID
2854 line[3] not in ('2', '3') or # Version
2855 line[4] != '0' or # Channel
2856 line[5] != '0' or # Level
2857 line[7] != '0' or # Task
2858 line[8] != '0x0000000000000000' or # Keyword
2859 line[12] != '' or # Instance ID
2860 line[13] != '' or # Parent Instance ID
2861 line[14] != self.NULL_GUID or # Activity ID
2862 line[15] != ''): # Related Activity ID
2863 raise TracingFailure(
2864 'Found unexpected values in line: %s' % ' '.join(line),
2865 None, None, None)
2866
2867 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
2868 continue
2869
2870 # Convert the PID in-place from hex.
2871 line[self.PID] = int(line[self.PID], 16)
2872
2873 yield [
2874 line[self.EVENT_NAME],
2875 line[self.TYPE],
2876 line[self.PID],
2877 line[self.TID],
2878 line[self.PROCESSOR_ID],
2879 line[self.TIMESTAMP],
2880 ] + line[self.USER_DATA:]
2881
2882 write_json('%s.json' % self._logname, list(trim(load_file())), True)
2883
2884 def _convert_log(self, logformat):
2885 """Converts the ETL trace to text representation.
2886
2887 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
2888 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
2889
2890 Arguments:
2891 - logformat: Text format to be generated, csv, csv_utf16 or xml.
2892
2893 Use "tracerpt -?" for help.
2894 """
2895 LOCALE_INVARIANT = 0x7F
2896 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
2897 cmd_convert = [
2898 'tracerpt.exe',
2899 '-l', self._logname + '.etl',
2900 '-o', self._logname + '.' + logformat,
2901 '-gmt', # Use UTC
2902 '-y', # No prompt
2903 # Use -of XML to get the header of each items after column 19, e.g. all
2904 # the actual headers of 'User Data'.
2905 ]
2906
2907 if logformat == 'csv':
2908 # tracerpt localizes the 'Type' column, for major brainfuck
2909 # entertainment. I can't imagine any sane reason to do that.
2910 cmd_convert.extend(['-of', 'CSV'])
2911 elif logformat == 'csv_utf16':
2912 # This causes it to use UTF-16, which doubles the log size but ensures
2913 # the log is readable for non-ASCII characters.
2914 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
2915 elif logformat == 'xml':
2916 cmd_convert.extend(['-of', 'XML'])
2917 else:
2918 raise ValueError('Unexpected log format \'%s\'' % logformat)
2919 logging.debug('Running: %s' % cmd_convert)
2920 # This can takes tens of minutes for large logs.
2921 # Redirects all output to stderr.
2922 subprocess.check_call(
2923 cmd_convert,
2924 stdin=subprocess.PIPE,
2925 stdout=sys.stderr,
2926 stderr=sys.stderr)
2927
2928 @staticmethod
2929 def clean_trace(logname):
2930 for ext in ('', '.csv', '.etl', '.json', '.xml'):
2931 if os.path.isfile(logname + ext):
2932 os.remove(logname + ext)
2933
2934 @classmethod
2935 def parse_log(cls, logname, blacklist):
2936 logging.info('parse_log(%s, %s)' % (logname, blacklist))
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002937 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002938
2939 def blacklist_more(filepath):
2940 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
2941 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
2942
2943 data = read_json(logname)
2944 lines = read_json(logname + '.json')
2945 out = []
2946 for item in data['traces']:
2947 context = cls.Context(blacklist_more, item['pid'])
2948 for line in lines:
2949 context.on_line(line)
2950 out.append(
2951 {
2952 'results': context.to_results(),
2953 'trace': item['trace'],
2954 'output': item['output'],
2955 })
2956 return out
2957
2958
2959def get_api():
2960 """Returns the correct implementation for the current OS."""
2961 if sys.platform == 'cygwin':
2962 raise NotImplementedError(
2963 'Not implemented for cygwin, start the script from Win32 python')
2964 flavors = {
2965 'win32': LogmanTrace,
2966 'darwin': Dtrace,
2967 'sunos5': Dtrace,
2968 'freebsd7': Dtrace,
2969 'freebsd8': Dtrace,
2970 }
2971 # Defaults to strace.
2972 return flavors.get(sys.platform, Strace)()
2973
2974
2975def extract_directories(root_dir, files, blacklist):
2976 """Detects if all the files in a directory are in |files| and if so, replace
2977 the individual files by a Results.Directory instance.
2978
2979 Takes a list of Results.File instances and returns a shorter list of
2980 Results.File and Results.Directory instances.
2981
2982 Arguments:
2983 - root_dir: Optional base directory that shouldn't be search further.
2984 - files: list of Results.File instances.
2985 - blacklist: regexp of files to ignore, for example r'.+\.pyc'.
2986 """
2987 logging.info(
2988 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
2989 assert not (root_dir or '').endswith(os.path.sep), root_dir
2990 assert not root_dir or (get_native_path_case(root_dir) == root_dir)
2991 assert not any(isinstance(f, Results.Directory) for f in files)
2992 # Remove non existent files.
2993 files = [f for f in files if f.existent]
2994 if not files:
2995 return files
2996 # All files must share the same root, which can be None.
2997 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
2998
2999 # Creates a {directory: {filename: File}} mapping, up to root.
3000 buckets = {}
3001 if root_dir:
3002 buckets[root_dir] = {}
3003 for fileobj in files:
3004 path = fileobj.full_path
3005 directory = os.path.dirname(path)
3006 assert directory
3007 # Do not use os.path.basename() so trailing os.path.sep is kept.
3008 basename = path[len(directory)+1:]
3009 files_in_directory = buckets.setdefault(directory, {})
3010 files_in_directory[basename] = fileobj
3011 # Add all the directories recursively up to root.
3012 while True:
3013 old_d = directory
3014 directory = os.path.dirname(directory)
3015 if directory + os.path.sep == root_dir or directory == old_d:
3016 break
3017 buckets.setdefault(directory, {})
3018
3019 root_prefix = len(root_dir) + 1 if root_dir else 0
3020 for directory in sorted(buckets, reverse=True):
3021 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3022 expected = set(buckets[directory])
3023 if not (actual - expected):
3024 parent = os.path.dirname(directory)
3025 buckets[parent][os.path.basename(directory)] = Results.Directory(
3026 root_dir,
3027 directory[root_prefix:],
3028 False,
3029 sum(f.size for f in buckets[directory].itervalues()),
3030 sum(f.nb_files for f in buckets[directory].itervalues()))
3031 # Remove the whole bucket.
3032 del buckets[directory]
3033
3034 # Reverse the mapping with what remains. The original instances are returned,
3035 # so the cached meta data is kept.
3036 files = sum((x.values() for x in buckets.itervalues()), [])
3037 return sorted(files, key=lambda x: x.path)
3038
3039
3040def trace(logfile, cmd, cwd, api, output):
3041 """Traces an executable. Returns (returncode, output) from api.
3042
3043 Arguments:
3044 - logfile: file to write to.
3045 - cmd: command to run.
3046 - cwd: current directory to start the process in.
3047 - api: a tracing api instance.
3048 - output: if True, returns output, otherwise prints it at the console.
3049 """
3050 cmd = fix_python_path(cmd)
3051 api.clean_trace(logfile)
3052 with api.get_tracer(logfile) as tracer:
3053 return tracer.trace(cmd, cwd, 'default', output)
3054
3055
3056def load_trace(logfile, root_dir, api, blacklist):
3057 """Loads a trace file and returns the Results instance.
3058
3059 Arguments:
3060 - logfile: File to load.
3061 - root_dir: Root directory to use to determine if a file is relevant to the
3062 trace or not.
3063 - api: A tracing api instance.
3064 - blacklist: Optional blacklist function to filter out unimportant files.
3065 """
3066 data = api.parse_log(logfile, (blacklist or (lambda _: False)))
3067 assert len(data) == 1, 'More than one trace was detected!'
3068 if 'exception' in data[0]:
3069 # It got an exception, raise it.
maruel@chromium.org424f5692012-10-15 15:42:38 +00003070 raise \
3071 data[0]['exception'][0], \
3072 data[0]['exception'][1], \
3073 data[0]['exception'][2]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003074 results = data[0]['results']
3075 if root_dir:
3076 results = results.strip_root(root_dir)
3077 return results
3078
3079
3080def CMDclean(args):
3081 """Cleans up traces."""
3082 parser = OptionParserTraceInputs(command='clean')
3083 options, args = parser.parse_args(args)
3084 api = get_api()
3085 api.clean_trace(options.log)
3086 return 0
3087
3088
3089def CMDtrace(args):
3090 """Traces an executable."""
3091 parser = OptionParserTraceInputs(command='trace')
3092 parser.allow_interspersed_args = False
3093 parser.add_option(
3094 '-q', '--quiet', action='store_true',
3095 help='Redirects traced executable output to /dev/null')
3096 options, args = parser.parse_args(args)
3097
3098 if not args:
3099 parser.error('Please provide a command to run')
3100
3101 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3102 args[0] = os.path.abspath(args[0])
3103
3104 api = get_api()
3105 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3106
3107
3108def CMDread(args):
3109 """Reads the logs and prints the result."""
3110 parser = OptionParserTraceInputs(command='read')
3111 parser.add_option(
3112 '-V', '--variable',
3113 nargs=2,
3114 action='append',
3115 dest='variables',
3116 metavar='VAR_NAME directory',
3117 default=[],
3118 help=('Variables to replace relative directories against. Example: '
3119 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3120 'home dir with $HOME') % getpass.getuser())
3121 parser.add_option(
3122 '--root-dir',
3123 help='Root directory to base everything off it. Anything outside of this '
3124 'this directory will not be reported')
3125 parser.add_option(
3126 '-j', '--json', action='store_true',
3127 help='Outputs raw result data as json')
3128 parser.add_option(
3129 '-b', '--blacklist', action='append', default=[],
3130 help='List of regexp to use as blacklist filter')
3131 options, args = parser.parse_args(args)
3132
3133 if options.root_dir:
3134 options.root_dir = os.path.abspath(options.root_dir)
3135
3136 variables = dict(options.variables)
3137 api = get_api()
3138 def blacklist(f):
3139 return any(re.match(b, f) for b in options.blacklist)
3140 data = api.parse_log(options.log, blacklist)
3141 # Process each trace.
3142 output_as_json = []
3143 for item in data:
3144 if 'exception' in item:
maruel@chromium.org424f5692012-10-15 15:42:38 +00003145 # Do not abort the other traces.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003146 print >> sys.stderr, (
maruel@chromium.org424f5692012-10-15 15:42:38 +00003147 'Trace %s: Got an exception: %s' % (
3148 item['trace'], item['exception'][1]))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003149 continue
3150 results = item['results']
3151 if options.root_dir:
3152 results = results.strip_root(options.root_dir)
3153
3154 if options.json:
3155 output_as_json.append(results.flatten())
3156 else:
3157 simplified = extract_directories(
3158 options.root_dir, results.files, blacklist)
3159 simplified = [f.replace_variables(variables) for f in simplified]
3160 if len(data) > 1:
3161 print('Trace: %s' % item['trace'])
3162 print('Total: %d' % len(results.files))
3163 print('Non existent: %d' % len(results.non_existent))
3164 for f in results.non_existent:
3165 print(' %s' % f.path)
3166 print(
3167 'Interesting: %d reduced to %d' % (
3168 len(results.existent), len(simplified)))
3169 for f in simplified:
3170 print(' %s' % f.path)
3171
3172 if options.json:
3173 write_json(sys.stdout, output_as_json, False)
3174 return 0
3175
3176
3177class OptionParserWithLogging(optparse.OptionParser):
3178 """Adds --verbose option."""
3179 def __init__(self, verbose=0, **kwargs):
3180 optparse.OptionParser.__init__(self, **kwargs)
3181 self.add_option(
3182 '-v', '--verbose',
3183 action='count',
3184 default=verbose,
3185 help='Use multiple times to increase verbosity')
3186
3187 def parse_args(self, *args, **kwargs):
3188 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
3189 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
3190 logging.basicConfig(
3191 level=levels[min(len(levels)-1, options.verbose)],
3192 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
3193 return options, args
3194
3195
3196class OptionParserWithNiceDescription(OptionParserWithLogging):
3197 """Generates the description with the command's docstring."""
3198 def __init__(self, **kwargs):
3199 """Sets 'description' and 'usage' if not already specified."""
3200 command = kwargs.pop('command', 'help')
3201 kwargs.setdefault(
3202 'description',
3203 re.sub('[\r\n ]{2,}', ' ', get_command_handler(command).__doc__))
3204 kwargs.setdefault('usage', '%%prog %s [options]' % command)
3205 OptionParserWithLogging.__init__(self, **kwargs)
3206
3207
3208class OptionParserTraceInputs(OptionParserWithNiceDescription):
3209 """Adds automatic --log handling."""
3210 def __init__(self, **kwargs):
3211 OptionParserWithNiceDescription.__init__(self, **kwargs)
3212 self.add_option(
3213 '-l', '--log', help='Log file to generate or read, required')
3214
3215 def parse_args(self, *args, **kwargs):
3216 """Makes sure the paths make sense.
3217
3218 On Windows, / and \ are often mixed together in a path.
3219 """
3220 options, args = OptionParserWithNiceDescription.parse_args(
3221 self, *args, **kwargs)
3222 if not options.log:
3223 self.error('Must supply a log file with -l')
3224 options.log = os.path.abspath(options.log)
3225 return options, args
3226
3227
3228def extract_documentation():
3229 """Returns a dict {command: description} for each of documented command."""
3230 commands = (
3231 fn[3:]
3232 for fn in dir(sys.modules['__main__'])
3233 if fn.startswith('CMD') and get_command_handler(fn[3:]).__doc__)
3234 return dict((fn, get_command_handler(fn).__doc__) for fn in commands)
3235
3236
3237def CMDhelp(args):
3238 """Prints list of commands or help for a specific command."""
3239 doc = extract_documentation()
3240 # Calculates the optimal offset.
3241 offset = max(len(cmd) for cmd in doc)
3242 format_str = ' %-' + str(offset + 2) + 's %s'
3243 # Generate a one-liner documentation of each commands.
3244 commands_description = '\n'.join(
3245 format_str % (cmd, doc[cmd].split('\n')[0]) for cmd in sorted(doc))
3246
3247 parser = OptionParserWithNiceDescription(
3248 usage='%prog <command> [options]',
3249 description='Commands are:\n%s\n' % commands_description)
3250 parser.format_description = lambda _: parser.description
3251
3252 # Strip out any -h or --help argument.
3253 _, args = parser.parse_args([i for i in args if not i in ('-h', '--help')])
3254 if len(args) == 1:
3255 if not get_command_handler(args[0]):
3256 parser.error('Unknown command %s' % args[0])
3257 # The command was "%prog help command", replaces ourself with
3258 # "%prog command --help" so help is correctly printed out.
3259 return main(args + ['--help'])
3260 elif args:
3261 parser.error('Unknown argument "%s"' % ' '.join(args))
3262 parser.print_help()
3263 return 0
3264
3265
3266def get_command_handler(name):
3267 """Returns the command handler or CMDhelp if it doesn't exist."""
3268 return getattr(sys.modules['__main__'], 'CMD%s' % name, None)
3269
3270
3271def main_impl(argv):
3272 command = get_command_handler(argv[0] if argv else 'help')
3273 if not command:
3274 return CMDhelp(argv)
3275 return command(argv[1:])
3276
3277def main(argv):
3278 try:
3279 main_impl(argv)
3280 except TracingFailure, e:
3281 sys.stderr.write('\nError: ')
3282 sys.stderr.write(str(e))
3283 sys.stderr.write('\n')
3284 return 1
3285
3286
3287if __name__ == '__main__':
3288 sys.exit(main(sys.argv[1:]))