blob: ab2c95d50e66f7511a7614ec63c639da90aeeef5 [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
3# Copyright (c) 2012 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
23import getpass
24import glob
25import json
26import logging
27import optparse
28import os
29import re
30import subprocess
31import sys
32import tempfile
33import threading
34import time
35import weakref
36
37## OS-specific imports
38
39if sys.platform == 'win32':
40 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p
41 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611
42 from ctypes.wintypes import GetLastError # pylint: disable=E0611
43elif sys.platform == 'darwin':
44 import Carbon.File # pylint: disable=F0401
45 import MacOS # pylint: disable=F0401
46
47
48BASE_DIR = os.path.dirname(os.path.abspath(__file__))
49ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
50
51
52class TracingFailure(Exception):
53 """An exception occured during tracing."""
54 def __init__(self, description, pid, line_number, line, *args):
55 super(TracingFailure, self).__init__(
56 description, pid, line_number, line, *args)
57 self.description = description
58 self.pid = pid
59 self.line_number = line_number
60 self.line = line
61 self.extra = args
62
63 def __str__(self):
64 out = self.description
65 if self.pid:
66 out += '\npid: %d' % self.pid
67 if self.line_number:
68 out += '\nline: %d' % self.line_number
69 if self.line:
70 out += '\n%s' % self.line
71 if self.extra:
72 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
73 return out
74
75
76## OS-specific functions
77
78if sys.platform == 'win32':
79 def QueryDosDevice(drive_letter):
80 """Returns the Windows 'native' path for a DOS drive letter."""
81 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
82 # Guesswork. QueryDosDeviceW never returns the required number of bytes.
83 chars = 1024
84 drive_letter = unicode(drive_letter)
85 p = create_unicode_buffer(chars)
86 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):
87 err = GetLastError()
88 if err:
89 # pylint: disable=E0602
90 raise WindowsError(
91 err,
92 'QueryDosDevice(%s): %s (%d)' % (
93 str(drive_letter), FormatError(err), err))
94 return p.value
95
96
97 def GetShortPathName(long_path):
98 """Returns the Windows short path equivalent for a 'long' path."""
99 long_path = unicode(long_path)
100 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
101 # not enforced.
102 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):
103 long_path = '\\\\?\\' + long_path
104 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)
105 if chars:
106 p = create_unicode_buffer(chars)
107 if windll.kernel32.GetShortPathNameW(long_path, p, chars):
108 return p.value
109
110 err = GetLastError()
111 if err:
112 # pylint: disable=E0602
113 raise WindowsError(
114 err,
115 'GetShortPathName(%s): %s (%d)' % (
116 str(long_path), FormatError(err), err))
117
118
119 def GetLongPathName(short_path):
120 """Returns the Windows long path equivalent for a 'short' path."""
121 short_path = unicode(short_path)
122 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
123 # not enforced.
124 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):
125 short_path = '\\\\?\\' + short_path
126 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)
127 if chars:
128 p = create_unicode_buffer(chars)
129 if windll.kernel32.GetLongPathNameW(short_path, p, chars):
130 return p.value
131
132 err = GetLastError()
133 if err:
134 # pylint: disable=E0602
135 raise WindowsError(
136 err,
137 'GetLongPathName(%s): %s (%d)' % (
138 str(short_path), FormatError(err), err))
139
140
141 def get_current_encoding():
142 """Returns the 'ANSI' code page associated to the process."""
143 return 'cp%d' % int(windll.kernel32.GetACP())
144
145
146 class DosDriveMap(object):
147 """Maps \Device\HarddiskVolumeN to N: on Windows."""
148 # Keep one global cache.
149 _MAPPING = {}
150
151 def __init__(self):
152 """Lazy loads the cache."""
153 if not self._MAPPING:
154 # This is related to UNC resolver on windows. Ignore that.
155 self._MAPPING['\\Device\\Mup'] = None
156 self._MAPPING['\\SystemRoot'] = os.environ['SystemRoot']
157
158 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
159 try:
160 letter = '%s:' % letter
161 mapped = QueryDosDevice(letter)
162 if mapped in self._MAPPING:
163 logging.warn(
164 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
165 '. Drive letters are a user-mode concept and the kernel '
166 'traces only have NT path, so all accesses will be '
167 'associated with the first drive letter, independent of the '
168 'actual letter used by the code') % (
169 self._MAPPING[mapped], letter))
170 else:
171 self._MAPPING[mapped] = letter
172 except WindowsError: # pylint: disable=E0602
173 pass
174
175 def to_win32(self, path):
176 """Converts a native NT path to Win32/DOS compatible path."""
177 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
178 if not match:
179 raise ValueError(
180 'Can\'t convert %s into a Win32 compatible path' % path,
181 path)
182 if not match.group(1) in self._MAPPING:
183 # Unmapped partitions may be accessed by windows for the
184 # fun of it while the test is running. Discard these.
185 return None
186 drive = self._MAPPING[match.group(1)]
187 if not drive or not match.group(2):
188 return drive
189 return drive + match.group(2)
190
191
192 def isabs(path):
193 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""
194 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
195
196
197 def get_native_path_case(p):
198 """Returns the native path case for an existing file.
199
200 On Windows, removes any leading '\\?\'.
201 """
202 if not isabs(p):
203 raise ValueError(
204 'Can\'t get native path case for a non-absolute path: %s' % p,
205 p)
206 # Windows used to have an option to turn on case sensitivity on non Win32
207 # subsystem but that's out of scope here and isn't supported anymore.
208 # Go figure why GetShortPathName() is needed.
209 try:
210 out = GetLongPathName(GetShortPathName(p))
211 except OSError, e:
212 if e.args[0] in (2, 3, 5):
213 # The path does not exist. Try to recurse and reconstruct the path.
214 base = os.path.dirname(p)
215 rest = os.path.basename(p)
216 return os.path.join(get_native_path_case(base), rest)
217 raise
218 if out.startswith('\\\\?\\'):
219 out = out[4:]
220 # Always upper case the first letter since GetLongPathName() will return the
221 # drive letter in the case it was given.
222 return out[0].upper() + out[1:]
223
224
225 def CommandLineToArgvW(command_line):
226 """Splits a commandline into argv using CommandLineToArgvW()."""
227 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
228 size = c_int()
229 ptr = windll.shell32.CommandLineToArgvW(unicode(command_line), byref(size))
230 try:
231 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
232 finally:
233 windll.kernel32.LocalFree(ptr)
234
235
236elif sys.platform == 'darwin':
237
238
239 # On non-windows, keep the stdlib behavior.
240 isabs = os.path.isabs
241
242
243 def _find_item_native_case(root_path, item):
244 """Gets the native path case of a single item based at root_path.
245
246 There is no API to get the native path case of symlinks on OSX. So it
247 needs to be done the slow way.
248 """
249 item = item.lower()
250 for element in os.listdir(root_path):
251 if element.lower() == item:
252 return element
253
254
255 def _native_case(p):
256 """Gets the native path case. Warning: this function resolves symlinks."""
257 logging.debug('native_case(%s)' % p)
258 try:
259 rel_ref, _ = Carbon.File.FSPathMakeRef(p)
260 out = rel_ref.FSRefMakePath()
261 if p.endswith(os.path.sep) and not out.endswith(os.path.sep):
262 return out + os.path.sep
263 return out
264 except MacOS.Error, e:
265 if e.args[0] in (-43, -120):
266 # The path does not exist. Try to recurse and reconstruct the path.
267 # -43 means file not found.
268 # -120 means directory not found.
269 base = os.path.dirname(p)
270 rest = os.path.basename(p)
271 return os.path.join(_native_case(base), rest)
272 raise OSError(
273 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])
274
275
276 def _split_at_symlink_native(base_path, rest):
277 """Returns the native path for a symlink."""
278 base, symlink, rest = split_at_symlink(base_path, rest)
279 if symlink:
280 if not base_path:
281 base_path = base
282 else:
283 base_path = safe_join(base_path, base)
284 symlink = _find_item_native_case(base_path, symlink)
285 return base, symlink, rest
286
287
288 def get_native_path_case(path):
289 """Returns the native path case for an existing file.
290
291 Technically, it's only HFS+ on OSX that is case preserving and
292 insensitive. It's the default setting on HFS+ but can be changed.
293 """
294 if not isabs(path):
295 raise ValueError(
296 'Can\'t get native path case for a non-absolute path: %s' % path,
297 path)
298 if path.startswith('/dev'):
299 # /dev is not visible from Carbon, causing an exception.
300 return path
301
302 # Starts assuming there is no symlink along the path.
303 resolved = _native_case(path)
304 if resolved.lower() == path.lower():
305 # This code path is incredibly faster.
306 return resolved
307
308 # There was a symlink, process it.
309 base, symlink, rest = _split_at_symlink_native(None, path)
310 assert symlink, (path, base, symlink, rest, resolved)
311 prev = base
312 base = safe_join(_native_case(base), symlink)
313 assert len(base) > len(prev)
314 while rest:
315 prev = base
316 relbase, symlink, rest = _split_at_symlink_native(base, rest)
317 base = safe_join(base, relbase)
318 assert len(base) > len(prev), (prev, base, symlink)
319 if symlink:
320 base = safe_join(base, symlink)
321 assert len(base) > len(prev), (prev, base, symlink)
322 # Make sure no symlink was resolved.
323 assert base.lower() == path.lower(), (base, path)
324 return base
325
326
327else: # OSes other than Windows and OSX.
328
329
330 # On non-windows, keep the stdlib behavior.
331 isabs = os.path.isabs
332
333
334 def get_native_path_case(path):
335 """Returns the native path case for an existing file.
336
337 On OSes other than OSX and Windows, assume the file system is
338 case-sensitive.
339
340 TODO(maruel): This is not strictly true. Implement if necessary.
341 """
342 if not isabs(path):
343 raise ValueError(
344 'Can\'t get native path case for a non-absolute path: %s' % path,
345 path)
346 # Give up on cygwin, as GetLongPathName() can't be called.
347 # Linux traces tends to not be normalized so use this occasion to normalize
348 # it. This function implementation already normalizes the path on the other
349 # OS so this needs to be done here to be coherent between OSes.
350 out = os.path.normpath(path)
351 if path.endswith(os.path.sep) and not out.endswith(os.path.sep):
352 return out + os.path.sep
353 return out
354
355
356if sys.platform != 'win32': # All non-Windows OSes.
357
358
359 def safe_join(*args):
360 """Joins path elements like os.path.join() but doesn't abort on absolute
361 path.
362
363 os.path.join('foo', '/bar') == '/bar'
364 but safe_join('foo', '/bar') == 'foo/bar'.
365 """
366 out = ''
367 for element in args:
368 if element.startswith(os.path.sep):
369 if out.endswith(os.path.sep):
370 out += element[1:]
371 else:
372 out += element
373 else:
374 if out.endswith(os.path.sep):
375 out += element
376 else:
377 out += os.path.sep + element
378 return out
379
380
381 def split_at_symlink(base_dir, relfile):
382 """Scans each component of relfile and cut the string at the symlink if
383 there is any.
384
385 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if
386 not symlink was found.
387 """
388 if base_dir:
389 assert relfile
390 assert os.path.isabs(base_dir)
391 index = 0
392 else:
393 assert os.path.isabs(relfile)
394 index = 1
395
396 def at_root(rest):
397 if base_dir:
398 return safe_join(base_dir, rest)
399 return rest
400
401 while True:
402 try:
403 index = relfile.index(os.path.sep, index)
404 except ValueError:
405 index = len(relfile)
406 full = at_root(relfile[:index])
407 if os.path.islink(full):
408 # A symlink!
409 base = os.path.dirname(relfile[:index])
410 symlink = os.path.basename(relfile[:index])
411 rest = relfile[index:]
412 logging.debug(
413 'split_at_symlink(%s, %s) -> (%s, %s, %s)' %
414 (base_dir, relfile, base, symlink, rest))
415 return base, symlink, rest
416 if index == len(relfile):
417 break
418 index += 1
419 return relfile, None, None
420
421
422def fix_python_path(cmd):
423 """Returns the fixed command line to call the right python executable."""
424 out = cmd[:]
425 if out[0] == 'python':
426 out[0] = sys.executable
427 elif out[0].endswith('.py'):
428 out.insert(0, sys.executable)
429 return out
430
431
432def create_thunk():
433 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
434 os.write(
435 handle,
436 (
437 'import subprocess\n'
438 'import sys\n'
439 'sys.exit(subprocess.call(sys.argv[2:]))\n'
440 ))
441 os.close(handle)
442 return name
443
444
445def strace_process_quoted_arguments(text):
446 """Extracts quoted arguments on a string and return the arguments as a list.
447
448 Implemented as an automaton. Supports incomplete strings in the form
449 '"foo"...'.
450
451 Example:
452 With text = '"foo", "bar"', the function will return ['foo', 'bar']
453
454 TODO(maruel): Implement escaping.
455 """
456 # All the possible states of the DFA.
457 ( NEED_QUOTE, # Begining of a new arguments.
458 INSIDE_STRING, # Inside an argument.
459 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
460 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
461 # a serie of 3 dots or a comma.
462 NEED_SPACE, # Right after a comma
463 NEED_DOT_2, # Found a dot, need a second one.
464 NEED_DOT_3, # Found second dot, need a third one.
465 NEED_COMMA, # Found third dot, need a comma.
466 ) = range(8)
467
468 state = NEED_QUOTE
469 out = []
470 for index, char in enumerate(text):
471 if char == '"':
472 if state == NEED_QUOTE:
473 state = INSIDE_STRING
474 # A new argument was found.
475 out.append('')
476 elif state == INSIDE_STRING:
477 # The argument is now closed.
478 state = NEED_COMMA_OR_DOT
479 elif state == ESCAPED:
480 out[-1] += char
481 state = INSIDE_STRING
482 else:
483 raise ValueError(
484 'Can\'t process char at column %d for: %r' % (index, text),
485 index,
486 text)
487 elif char == ',':
488 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
489 state = NEED_SPACE
490 elif state == INSIDE_STRING:
491 out[-1] += char
492 elif state == ESCAPED:
493 out[-1] += char
494 state = INSIDE_STRING
495 else:
496 raise ValueError(
497 'Can\'t process char at column %d for: %r' % (index, text),
498 index,
499 text)
500 elif char == ' ':
501 if state == NEED_SPACE:
502 state = NEED_QUOTE
503 elif state == INSIDE_STRING:
504 out[-1] += char
505 elif state == ESCAPED:
506 out[-1] += char
507 state = INSIDE_STRING
508 else:
509 raise ValueError(
510 'Can\'t process char at column %d for: %r' % (index, text),
511 index,
512 text)
513 elif char == '.':
514 if state == NEED_COMMA_OR_DOT:
515 # The string is incomplete, this mean the strace -s flag should be
516 # increased.
517 state = NEED_DOT_2
518 elif state == NEED_DOT_2:
519 state = NEED_DOT_3
520 elif state == NEED_DOT_3:
521 state = NEED_COMMA
522 elif state == INSIDE_STRING:
523 out[-1] += char
524 elif state == ESCAPED:
525 out[-1] += char
526 state = INSIDE_STRING
527 else:
528 raise ValueError(
529 'Can\'t process char at column %d for: %r' % (index, text),
530 index,
531 text)
532 elif char == '\\':
533 if state == ESCAPED:
534 out[-1] += char
535 state = INSIDE_STRING
536 elif state == INSIDE_STRING:
537 state = ESCAPED
538 else:
539 raise ValueError(
540 'Can\'t process char at column %d for: %r' % (index, text),
541 index,
542 text)
543 else:
544 if state == INSIDE_STRING:
545 out[-1] += char
546 else:
547 raise ValueError(
548 'Can\'t process char at column %d for: %r' % (index, text),
549 index,
550 text)
551 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
552 raise ValueError(
553 'String is incorrectly terminated: %r' % text,
554 text)
555 return out
556
557
558def read_json(filepath):
559 with open(filepath, 'r') as f:
560 return json.load(f)
561
562
563def write_json(filepath_or_handle, data, dense):
564 """Writes data into filepath or file handle encoded as json.
565
566 If dense is True, the json is packed. Otherwise, it is human readable.
567 """
568 if hasattr(filepath_or_handle, 'write'):
569 if dense:
570 filepath_or_handle.write(
571 json.dumps(data, sort_keys=True, separators=(',',':')))
572 else:
573 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
574 else:
575 with open(filepath_or_handle, 'wb') as f:
576 if dense:
577 json.dump(data, f, sort_keys=True, separators=(',',':'))
578 else:
579 json.dump(data, f, sort_keys=True, indent=2)
580
581
582class Results(object):
583 """Results of a trace session."""
584
585 class _TouchedObject(object):
586 """Something, a file or a directory, that was accessed."""
587 def __init__(self, root, path, tainted, size, nb_files):
588 logging.debug(
589 '%s(%s, %s, %s, %s, %s)' %
590 (self.__class__.__name__, root, path, tainted, size, nb_files))
591 self.root = root
592 self.path = path
593 self.tainted = tainted
594 self.nb_files = nb_files
595 # Can be used as a cache or a default value, depending on context.
596 self._size = size
597 # These are cache only.
598 self._real_path = None
599
600 # Check internal consistency.
601 assert path, path
602 assert tainted or bool(root) != bool(isabs(path)), (root, path)
603 assert tainted or (
604 not os.path.exists(self.full_path) or
605 (self.full_path == get_native_path_case(self.full_path))), (
606 tainted, self.full_path, get_native_path_case(self.full_path))
607
608 @property
609 def existent(self):
610 return self.size != -1
611
612 @property
613 def full_path(self):
614 if self.root:
615 return os.path.join(self.root, self.path)
616 return self.path
617
618 @property
619 def real_path(self):
620 """Returns the path with symlinks resolved."""
621 if not self._real_path:
622 self._real_path = os.path.realpath(self.full_path)
623 return self._real_path
624
625 @property
626 def size(self):
627 """File's size. -1 is not existent."""
628 if self._size is None and not self.tainted:
629 try:
630 self._size = os.stat(self.full_path).st_size
631 except OSError:
632 self._size = -1
633 return self._size
634
635 def flatten(self):
636 """Returns a dict representing this object.
637
638 A 'size' of 0 means the file was only touched and not read.
639 """
640 return {
641 'path': self.path,
642 'size': self.size,
643 }
644
645 def replace_variables(self, variables):
646 """Replaces the root of this File with one of the variables if it matches.
647
648 If a variable replacement occurs, the cloned object becomes tainted.
649 """
650 for variable, root_path in variables.iteritems():
651 if self.path.startswith(root_path):
652 return self._clone(
653 self.root, variable + self.path[len(root_path):], True)
654 # No need to clone, returns ourself.
655 return self
656
657 def strip_root(self, root):
658 """Returns a clone of itself with 'root' stripped off."""
659 # Check internal consistency.
660 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root
661 if not self.full_path.startswith(root):
662 # Now try to resolve the symlinks to see if it can be reached this way.
663 # Only try *after* trying without resolving symlink.
664 if not self.real_path.startswith(root):
665 return None
666 path = self.real_path
667 else:
668 path = self.full_path
669 return self._clone(root, path[len(root):], self.tainted)
670
671 def _clone(self, new_root, new_path, tainted):
672 raise NotImplementedError(self.__class__.__name__)
673
674 class File(_TouchedObject):
675 """A file that was accessed. May not be present anymore.
676
677 If tainted is true, it means it is not a real path anymore as a variable
678 replacement occured.
679
680 If touched_only is True, this means the file was probed for existence, and
681 it is existent, but was never _opened_. If touched_only is True, the file
682 must have existed.
683 """
684 def __init__(self, root, path, tainted, size):
685 super(Results.File, self).__init__(root, path, tainted, size, 1)
686
687 def _clone(self, new_root, new_path, tainted):
688 """Clones itself keeping meta-data."""
689 # Keep the self.size and self._real_path caches for performance reason. It
690 # is also important when the file becomes tainted (with a variable instead
691 # of the real path) since self.path is not an on-disk path anymore so
692 # out._size cannot be updated.
693 out = self.__class__(new_root, new_path, tainted, self.size)
694 out._real_path = self._real_path
695 return out
696
697 class Directory(_TouchedObject):
698 """A directory of files. Must exist."""
699 def __init__(self, root, path, tainted, size, nb_files):
700 """path='.' is a valid value and must be handled appropriately."""
701 assert not path.endswith(os.path.sep), path
702 super(Results.Directory, self).__init__(
703 root, path + os.path.sep, tainted, size, nb_files)
704 # For a Directory instance, self.size is not a cache, it's an actual value
705 # that is never modified and represents the total size of the files
706 # contained in this directory. It is possible that the directory is empty
707 # so that size == 0; this happens if there's only an invalid symlink in
708 # it.
709
710 def flatten(self):
711 out = super(Results.Directory, self).flatten()
712 out['nb_files'] = self.nb_files
713 return out
714
715 def _clone(self, new_root, new_path, tainted):
716 """Clones itself keeping meta-data."""
717 out = self.__class__(
718 new_root,
719 new_path.rstrip(os.path.sep),
720 tainted,
721 self.size,
722 self.nb_files)
723 out._real_path = self._real_path
724 return out
725
726 class Process(object):
727 """A process that was traced.
728
729 Contains references to the files accessed by this process and its children.
730 """
731 def __init__(self, pid, files, executable, command, initial_cwd, children):
732 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
733 self.pid = pid
734 self.files = sorted(files, key=lambda x: x.path)
735 self.children = children
736 self.executable = executable
737 self.command = command
738 self.initial_cwd = initial_cwd
739
740 # Check internal consistency.
741 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
742 f.path for f in self.files)
743 assert isinstance(self.children, list)
744 assert isinstance(self.files, list)
745
746 @property
747 def all(self):
748 for child in self.children:
749 for i in child.all:
750 yield i
751 yield self
752
753 def flatten(self):
754 return {
755 'children': [c.flatten() for c in self.children],
756 'command': self.command,
757 'executable': self.executable,
758 'files': [f.flatten() for f in self.files],
759 'initial_cwd': self.initial_cwd,
760 'pid': self.pid,
761 }
762
763 def strip_root(self, root):
764 assert isabs(root) and root.endswith(os.path.sep), root
765 # Loads the files after since they are constructed as objects.
766 out = self.__class__(
767 self.pid,
768 filter(None, (f.strip_root(root) for f in self.files)),
769 self.executable,
770 self.command,
771 self.initial_cwd,
772 [c.strip_root(root) for c in self.children])
773 logging.debug(
774 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
775 return out
776
777 def __init__(self, process):
778 self.process = process
779 # Cache.
780 self._files = None
781
782 def flatten(self):
783 return {
784 'root': self.process.flatten(),
785 }
786
787 @property
788 def files(self):
789 if self._files is None:
790 self._files = sorted(
791 sum((p.files for p in self.process.all), []),
792 key=lambda x: x.path)
793 return self._files
794
795 @property
796 def existent(self):
797 return [f for f in self.files if f.existent]
798
799 @property
800 def non_existent(self):
801 return [f for f in self.files if not f.existent]
802
803 def strip_root(self, root):
804 """Returns a clone with all the files outside the directory |root| removed
805 and converts all the path to be relative paths.
806 """
807 # Resolve any symlink
808 root = os.path.realpath(root)
809 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep
810 logging.debug('strip_root(%s)' % root)
811 return Results(self.process.strip_root(root))
812
813
814class ApiBase(object):
815 """OS-agnostic API to trace a process and its children."""
816 class Context(object):
817 """Processes one log line at a time and keeps the list of traced processes.
818
819 The parsing is complicated by the fact that logs are traced out of order for
820 strace but in-order for dtrace and logman. In addition, on Windows it is
821 very frequent that processids are reused so a flat list cannot be used. But
822 at the same time, it is impossible to faithfully construct a graph when the
823 logs are processed out of order. So both a tree and a flat mapping are used,
824 the tree is the real process tree, while the flat mapping stores the last
825 valid process for the corresponding processid. For the strace case, the
826 tree's head is guessed at the last moment.
827 """
828 class Process(object):
829 """Keeps context for one traced child process.
830
831 Logs all the files this process touched. Ignores directories.
832 """
833 def __init__(self, blacklist, pid, initial_cwd):
834 # Check internal consistency.
835 assert isinstance(pid, int), repr(pid)
836 self.pid = pid
837 # children are Process instances.
838 self.children = []
839 self.initial_cwd = initial_cwd
840 self.cwd = None
841 self.files = set()
842 self.only_touched = set()
843 self.executable = None
844 self.command = None
845 self._blacklist = blacklist
846
847 def to_results_process(self):
848 """Resolves file case sensitivity and or late-bound strings."""
849 # When resolving files, it's normal to get dupe because a file could be
850 # opened multiple times with different case. Resolve the deduplication
851 # here.
852 def fix_path(x):
853 """Returns the native file path case.
854
855 Converts late-bound strings.
856 """
857 if not x:
858 # Do not convert None instance to 'None'.
859 return x
860 # TODO(maruel): Do not upconvert to unicode here, on linux we don't
861 # know the file path encoding so they must be treated as bytes.
862 x = unicode(x)
863 if os.path.isabs(x):
864 # If the path is not absolute, which tends to happen occasionally on
865 # Windows, it is not possible to get the native path case so ignore
866 # that trace. It mostly happens for 'executable' value.
867 x = get_native_path_case(x)
868 return x
869
870 def fix_and_blacklist_path(x):
871 x = fix_path(x)
872 if not x:
873 return
874 # The blacklist needs to be reapplied, since path casing could
875 # influence blacklisting.
876 if self._blacklist(x):
877 return
878 return x
879
880 # Filters out directories. Some may have passed through.
881 files = set(f for f in map(fix_and_blacklist_path, self.files) if f)
882 only_touched = set(
883 f for f in map(fix_and_blacklist_path, self.only_touched) if f)
884 only_touched -= files
885
886 files = [
887 Results.File(None, f, False, None) for f in files
888 if not os.path.isdir(f)
889 ]
890 # Using 0 as size means the file's content is ignored since the file was
891 # never opened for I/O.
892 files.extend(
893 Results.File(None, f, False, 0) for f in only_touched
894 if not os.path.isdir(f)
895 )
896 return Results.Process(
897 self.pid,
898 files,
899 fix_path(self.executable),
900 self.command,
901 fix_path(self.initial_cwd),
902 [c.to_results_process() for c in self.children])
903
904 def add_file(self, filepath, touch_only):
905 """Adds a file if it passes the blacklist."""
906 if self._blacklist(unicode(filepath)):
907 return
908 logging.debug('add_file(%d, %s, %s)' % (self.pid, filepath, touch_only))
909 # Note that filepath and not unicode(filepath) is added. It is because
910 # filepath could be something else than a string, like a RelativePath
911 # instance for dtrace logs.
912 if touch_only:
913 self.only_touched.add(filepath)
914 else:
915 self.files.add(filepath)
916
917 def __init__(self, blacklist):
918 self.blacklist = blacklist
919 # Initial process.
920 self.root_process = None
921 # dict to accelerate process lookup, to not have to lookup the whole graph
922 # each time.
923 self._process_lookup = {}
924
925 class Tracer(object):
926 """During it's lifetime, the tracing subsystem is enabled."""
927 def __init__(self, logname):
928 self._logname = logname
929 self._lock = threading.Lock()
930 self._traces = []
931 self._initialized = True
932
933 def trace(self, cmd, cwd, tracename, output):
934 """Runs the OS-specific trace program on an executable.
935
936 Arguments:
937 - cmd: The command (a list) to run.
938 - cwd: Current directory to start the child process in.
939 - tracename: Name of the trace in the logname file.
940 - output: If False, redirects output to PIPEs.
941
942 Returns a tuple (resultcode, output) and updates the internal trace
943 entries.
944 """
945 # The implementation adds an item to self._traces.
946 raise NotImplementedError(self.__class__.__name__)
947
948 def close(self, _timeout=None):
949 """Saves the meta-data in the logname file.
950
951 For kernel-based tracing, stops the tracing subsystem.
952
953 Must not be used manually when using 'with' construct.
954 """
955 with self._lock:
956 assert self._initialized
957 try:
958 data = {
959 'traces': self._traces,
960 }
961 write_json(self._logname, data, False)
962 finally:
963 self._initialized = False
964
965 def post_process_log(self):
966 """Post-processes the log so it becomes faster to load afterward.
967
968 Must not be used manually when using 'with' construct.
969 """
970 assert not self._initialized, 'Must stop tracing first.'
971
972 def __enter__(self):
973 """Enables 'with' statement."""
974 return self
975
976 def __exit__(self, exc_type, exc_value, traceback):
977 """Enables 'with' statement."""
978 self.close()
979 # If an exception was thrown, do not process logs.
980 if not exc_type:
981 self.post_process_log()
982
983 def get_tracer(self, logname):
984 """Returns an ApiBase.Tracer instance.
985
986 Initializes the tracing subsystem, which is a requirement for kernel-based
987 tracers. Only one tracer instance should be live at a time!
988
989 logname is the filepath to the json file that will contain the meta-data
990 about the logs.
991 """
992 return self.Tracer(logname)
993
994 @staticmethod
995 def clean_trace(logname):
996 """Deletes an old log."""
997 raise NotImplementedError()
998
999 @classmethod
1000 def parse_log(cls, logname, blacklist):
1001 """Processes trace logs and returns the files opened and the files that do
1002 not exist.
1003
1004 It does not track directories.
1005
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001006 |logname| must be an absolute path.
1007
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001008 Most of the time, files that do not exist are temporary test files that
1009 should be put in /tmp instead. See http://crbug.com/116251.
1010
1011 Returns a list of dict with keys:
1012 - results: A Results instance.
1013 - trace: The corresponding tracename parameter provided to
1014 get_tracer().trace().
1015 - output: Output gathered during execution, if get_tracer().trace(...,
1016 output=False) was used.
1017 """
1018 raise NotImplementedError(cls.__class__.__name__)
1019
1020
1021class Strace(ApiBase):
1022 """strace implies linux."""
1023 class Context(ApiBase.Context):
1024 """Processes a strace log line and keeps the list of existent and non
1025 existent files accessed.
1026
1027 Ignores directories.
1028
1029 Uses late-binding to processes the cwd of each process. The problem is that
1030 strace generates one log file per process it traced but doesn't give any
1031 information about which process was started when and by who. So we don't
1032 even know which process is the initial one. So process the logs out of
1033 order and use late binding with RelativePath to be able to deduce the
1034 initial directory of each process once all the logs are parsed.
1035 """
1036 class Process(ApiBase.Context.Process):
1037 """Represents the state of a process.
1038
1039 Contains all the information retrieved from the pid-specific log.
1040 """
1041 # Function names are using ([a-z_0-9]+)
1042 # This is the most common format. function(args) = result
1043 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.+?)\)\s+= (.+)$')
1044 # An interrupted function call, only grab the minimal header.
1045 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001046 # An interrupted function call, with the process exiting. It must be the
1047 # last line in the log.
1048 RE_UNFINISHED_EXIT = re.compile(
1049 r'^([^\(]+)(.*) \<unfinished \.\.\.\ exit status \d+>$')
maruel@chromium.org4e506502012-10-18 15:02:58 +00001050 # An interrupted function call the hard way. Usually observed with futex()
1051 # on ubuntu 12.04.
csharp@chromium.org4d753552012-10-25 19:31:55 +00001052 RE_INTERRUPTED_HARD = re.compile(r'^([^\(]+)\([A-Z0-9a-fx\,\_\|\{\} ]*$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001053 # A resumed function call.
1054 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
1055 # A process received a signal.
1056 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
1057 # A process didn't handle a signal. Ignore any junk appearing before,
1058 # because the process was forcibly killed so it won't open any new file.
1059 RE_KILLED = re.compile(
1060 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
1061 # The process has exited.
1062 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
1063 # A call was canceled. Ignore any prefix.
1064 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
1065 # Happens when strace fails to even get the function name.
1066 UNNAMED_FUNCTION = '????'
1067
1068 # Corner-case in python, a class member function decorator must not be
1069 # @staticmethod.
1070 def parse_args(regexp, expect_zero): # pylint: disable=E0213
1071 """Automatically convert the str 'args' into a list of processed
1072 arguments.
1073
1074 Arguments:
1075 - regexp is used to parse args.
1076 - expect_zero: one of True, False or None.
1077 - True: will check for result.startswith('0') first and will ignore
1078 the trace line completely otherwise. This is important because for
1079 many functions, the regexp will not process if the call failed.
1080 - False: will check for not result.startswith(('?', '-1')) for the
1081 same reason than with True.
1082 - None: ignore result.
1083 """
1084 def meta_hook(function):
1085 assert function.__name__.startswith('handle_')
1086 def hook(self, args, result):
1087 if expect_zero is True and not result.startswith('0'):
1088 return
1089 if expect_zero is False and result.startswith(('?', '-1')):
1090 return
1091 match = re.match(regexp, args)
1092 if not match:
1093 raise TracingFailure(
1094 'Failed to parse %s(%s) = %s' %
1095 (function.__name__[len('handle_'):], args, result),
1096 None, None, None)
1097 return function(self, match.groups(), result)
1098 return hook
1099 return meta_hook
1100
1101 class RelativePath(object):
1102 """A late-bound relative path."""
1103 def __init__(self, parent, value):
1104 self.parent = parent
1105 self.value = value
1106
1107 def render(self):
1108 """Returns the current directory this instance is representing.
1109
1110 This function is used to return the late-bound value.
1111 """
1112 if self.value and self.value.startswith(u'/'):
1113 # An absolute path.
1114 return self.value
1115 parent = self.parent.render() if self.parent else u'<None>'
1116 if self.value:
1117 return os.path.normpath(os.path.join(parent, self.value))
1118 return parent
1119
1120 def __unicode__(self):
1121 """Acts as a string whenever needed."""
1122 return unicode(self.render())
1123
1124 def __str__(self):
1125 """Acts as a string whenever needed."""
1126 return str(self.render())
1127
1128 def __init__(self, root, pid):
1129 """Keeps enough information to be able to guess the original process
1130 root.
1131
1132 strace doesn't store which process was the initial process. So more
1133 information needs to be kept so the graph can be reconstructed from the
1134 flat map.
1135 """
1136 logging.info('%s(%d)' % (self.__class__.__name__, pid))
1137 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
1138 assert isinstance(root, ApiBase.Context)
1139 self._root = weakref.ref(root)
1140 # The dict key is the function name of the pending call, like 'open'
1141 # or 'execve'.
1142 self._pending_calls = {}
1143 self._line_number = 0
1144 # Current directory when the process started.
1145 self.initial_cwd = self.RelativePath(self._root(), None)
1146 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001147 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001148
1149 def get_cwd(self):
1150 """Returns the best known value of cwd."""
1151 return self.cwd or self.initial_cwd
1152
1153 def render(self):
1154 """Returns the string value of the RelativePath() object.
1155
1156 Used by RelativePath. Returns the initial directory and not the
1157 current one since the current directory 'cwd' validity is time-limited.
1158
1159 The validity is only guaranteed once all the logs are processed.
1160 """
1161 return self.initial_cwd.render()
1162
1163 def on_line(self, line):
1164 self._line_number += 1
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001165 if self._done:
1166 raise TracingFailure(
1167 'Found a trace for a terminated process',
1168 None, None, None)
1169
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001170 if self.RE_SIGNAL.match(line):
1171 # Ignore signals.
1172 return
1173
1174 try:
1175 match = self.RE_KILLED.match(line)
1176 if match:
1177 # Converts a '+++ killed by Foo +++' trace into an exit_group().
1178 self.handle_exit_group(match.group(1), None)
1179 return
1180
1181 match = self.RE_PROCESS_EXITED.match(line)
1182 if match:
1183 # Converts a '+++ exited with 1 +++' trace into an exit_group()
1184 self.handle_exit_group(match.group(1), None)
1185 return
1186
1187 match = self.RE_UNFINISHED.match(line)
1188 if match:
1189 if match.group(1) in self._pending_calls:
1190 raise TracingFailure(
1191 'Found two unfinished calls for the same function',
1192 None, None, None,
1193 self._pending_calls)
1194 self._pending_calls[match.group(1)] = (
1195 match.group(1) + match.group(2))
1196 return
1197
maruel@chromium.org4e506502012-10-18 15:02:58 +00001198 match = (
1199 self.RE_UNFINISHED_EXIT.match(line) or
1200 self.RE_INTERRUPTED_HARD.match(line))
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001201 if match:
1202 # The process died. No other line can be processed afterward.
1203 self._done = True
1204 return
1205
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001206 match = self.RE_UNAVAILABLE.match(line)
1207 if match:
1208 # This usually means a process was killed and a pending call was
1209 # canceled.
1210 # TODO(maruel): Look up the last exit_group() trace just above and
1211 # make sure any self._pending_calls[anything] is properly flushed.
1212 return
1213
1214 match = self.RE_RESUMED.match(line)
1215 if match:
1216 if match.group(1) not in self._pending_calls:
1217 raise TracingFailure(
1218 'Found a resumed call that was not logged as unfinished',
1219 None, None, None,
1220 self._pending_calls)
1221 pending = self._pending_calls.pop(match.group(1))
1222 # Reconstruct the line.
1223 line = pending + match.group(2)
1224
1225 match = self.RE_HEADER.match(line)
1226 if not match:
1227 raise TracingFailure(
1228 'Found an invalid line: %s' % line,
1229 None, None, None)
1230 if match.group(1) == self.UNNAMED_FUNCTION:
1231 return
1232
1233 # It's a valid line, handle it.
1234 handler = getattr(self, 'handle_%s' % match.group(1), None)
1235 if not handler:
1236 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1237 return handler(match.group(2), match.group(3))
1238 except TracingFailure, e:
1239 # Hack in the values since the handler could be a static function.
1240 e.pid = self.pid
1241 e.line = line
1242 e.line_number = self._line_number
1243 # Re-raise the modified exception.
1244 raise
1245 except (KeyError, NotImplementedError, ValueError), e:
1246 raise TracingFailure(
1247 'Trace generated a %s exception: %s' % (
1248 e.__class__.__name__, str(e)),
1249 self.pid,
1250 self._line_number,
1251 line,
1252 e)
1253
1254 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1255 def handle_access(self, args, _result):
1256 self._handle_file(args[0], True)
1257
1258 @parse_args(r'^\"(.+?)\"$', True)
1259 def handle_chdir(self, args, _result):
1260 """Updates cwd."""
1261 self.cwd = self.RelativePath(self, args[0])
1262 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1263
1264 def handle_clone(self, _args, result):
1265 """Transfers cwd."""
1266 if result.startswith(('?', '-1')):
1267 # The call failed.
1268 return
1269 # Update the other process right away.
1270 childpid = int(result)
1271 child = self._root().get_or_set_proc(childpid)
1272 if child.parentid is not None or childpid in self.children:
1273 raise TracingFailure(
1274 'Found internal inconsitency in process lifetime detection '
1275 'during a clone() call',
1276 None, None, None)
1277
1278 # Copy the cwd object.
1279 child.initial_cwd = self.get_cwd()
1280 child.parentid = self.pid
1281 # It is necessary because the logs are processed out of order.
1282 self.children.append(child)
1283
1284 def handle_close(self, _args, _result):
1285 pass
1286
1287 def handle_chmod(self, _args, _result):
1288 pass
1289
1290 def handle_creat(self, _args, _result):
1291 # Ignore files created, since they didn't need to exist.
1292 pass
1293
1294 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1295 def handle_execve(self, args, _result):
1296 # Even if in practice execve() doesn't returns when it succeeds, strace
1297 # still prints '0' as the result.
1298 filepath = args[0]
1299 self._handle_file(filepath, False)
1300 self.executable = self.RelativePath(self.get_cwd(), filepath)
1301 self.command = strace_process_quoted_arguments(args[1])
1302
1303 def handle_exit_group(self, _args, _result):
1304 """Removes cwd."""
1305 self.cwd = None
1306
1307 def handle_fork(self, args, result):
1308 self._handle_unknown('fork', args, result)
1309
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001310 def handle_futex(self, _args, _result):
1311 pass
1312
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001313 def handle_getcwd(self, _args, _result):
1314 pass
1315
1316 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1317 def handle_link(self, args, _result):
1318 self._handle_file(args[0], False)
1319 self._handle_file(args[1], False)
1320
1321 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1322 def handle_lstat(self, args, _result):
1323 self._handle_file(args[0], True)
1324
1325 def handle_mkdir(self, _args, _result):
1326 pass
1327
1328 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1329 def handle_open(self, args, _result):
1330 if 'O_DIRECTORY' in args[1]:
1331 return
1332 self._handle_file(args[0], False)
1333
1334 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1335 def handle_openat(self, args, _result):
1336 if 'O_DIRECTORY' in args[2]:
1337 return
1338 if args[1] == 'AT_FDCWD':
1339 self._handle_file(args[1], False)
1340 else:
1341 # TODO(maruel): Implement relative open if necessary instead of the
1342 # AT_FDCWD flag, let's hope not since this means tracking all active
1343 # directory handles.
1344 raise Exception('Relative open via openat not implemented.')
1345
1346 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1347 def handle_readlink(self, args, _result):
1348 self._handle_file(args[0], False)
1349
1350 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1351 def handle_rename(self, args, _result):
1352 self._handle_file(args[0], False)
1353 self._handle_file(args[1], False)
1354
1355 def handle_rmdir(self, _args, _result):
1356 pass
1357
1358 def handle_setxattr(self, _args, _result):
1359 pass
1360
1361 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1362 def handle_stat(self, args, _result):
1363 self._handle_file(args[0], True)
1364
1365 def handle_symlink(self, _args, _result):
1366 pass
1367
1368 def handle_unlink(self, _args, _result):
1369 # In theory, the file had to be created anyway.
1370 pass
1371
1372 def handle_statfs(self, _args, _result):
1373 pass
1374
1375 def handle_vfork(self, args, result):
1376 self._handle_unknown('vfork', args, result)
1377
1378 @staticmethod
1379 def _handle_unknown(function, args, result):
1380 raise TracingFailure(
1381 'Unexpected/unimplemented trace %s(%s)= %s' %
1382 (function, args, result),
1383 None, None, None)
1384
1385 def _handle_file(self, filepath, touch_only):
1386 filepath = self.RelativePath(self.get_cwd(), filepath)
1387 #assert not touch_only, unicode(filepath)
1388 self.add_file(filepath, touch_only)
1389
1390 def __init__(self, blacklist, initial_cwd):
1391 super(Strace.Context, self).__init__(blacklist)
1392 self.initial_cwd = initial_cwd
1393
1394 def render(self):
1395 """Returns the string value of the initial cwd of the root process.
1396
1397 Used by RelativePath.
1398 """
1399 return self.initial_cwd
1400
1401 def on_line(self, pid, line):
1402 """Transfers control into the Process.on_line() function."""
1403 self.get_or_set_proc(pid).on_line(line.strip())
1404
1405 def to_results(self):
1406 """Finds back the root process and verify consistency."""
1407 # TODO(maruel): Absolutely unecessary, fix me.
1408 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1409 if len(root) != 1:
1410 raise TracingFailure(
1411 'Found internal inconsitency in process lifetime detection '
1412 'while finding the root process',
1413 None,
1414 None,
1415 None,
1416 sorted(p.pid for p in root))
1417 self.root_process = root[0]
1418 process = self.root_process.to_results_process()
1419 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1420 raise TracingFailure(
1421 'Found internal inconsitency in process lifetime detection '
1422 'while looking for len(tree) == len(list)',
1423 None,
1424 None,
1425 None,
1426 sorted(self._process_lookup),
1427 sorted(p.pid for p in process.all))
1428 return Results(process)
1429
1430 def get_or_set_proc(self, pid):
1431 """Returns the Context.Process instance for this pid or creates a new one.
1432 """
1433 if not pid or not isinstance(pid, int):
1434 raise TracingFailure(
1435 'Unpexpected value for pid: %r' % pid,
1436 pid,
1437 None,
1438 None,
1439 pid)
1440 if pid not in self._process_lookup:
1441 self._process_lookup[pid] = self.Process(self, pid)
1442 return self._process_lookup[pid]
1443
1444 @classmethod
1445 def traces(cls):
1446 """Returns the list of all handled traces to pass this as an argument to
1447 strace.
1448 """
1449 prefix = 'handle_'
1450 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1451
1452 class Tracer(ApiBase.Tracer):
1453 MAX_LEN = 256
1454
1455 def trace(self, cmd, cwd, tracename, output):
1456 """Runs strace on an executable."""
1457 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1458 assert os.path.isabs(cmd[0]), cmd[0]
1459 assert os.path.isabs(cwd), cwd
1460 assert os.path.normpath(cwd) == cwd, cwd
1461 with self._lock:
1462 if not self._initialized:
1463 raise TracingFailure(
1464 'Called Tracer.trace() on an unitialized object',
1465 None, None, None, tracename)
1466 assert tracename not in (i['trace'] for i in self._traces)
1467 stdout = stderr = None
1468 if output:
1469 stdout = subprocess.PIPE
1470 stderr = subprocess.STDOUT
1471 # Ensure all file related APIs are hooked.
1472 traces = ','.join(Strace.Context.traces() + ['file'])
1473 trace_cmd = [
1474 'strace',
1475 '-ff',
1476 '-s', '%d' % self.MAX_LEN,
1477 '-e', 'trace=%s' % traces,
1478 '-o', self._logname + '.' + tracename,
1479 ]
1480 child = subprocess.Popen(
1481 trace_cmd + cmd,
1482 cwd=cwd,
1483 stdin=subprocess.PIPE,
1484 stdout=stdout,
1485 stderr=stderr)
1486 out = child.communicate()[0]
1487 # TODO(maruel): Walk the logs and figure out the root process would
1488 # simplify parsing the logs a *lot*.
1489 with self._lock:
1490 assert tracename not in (i['trace'] for i in self._traces)
1491 self._traces.append(
1492 {
1493 'cmd': cmd,
1494 'cwd': cwd,
1495 # The pid of strace process, not very useful.
1496 'pid': child.pid,
1497 'trace': tracename,
1498 'output': out,
1499 })
1500 return child.returncode, out
1501
1502 @staticmethod
1503 def clean_trace(logname):
1504 if os.path.isfile(logname):
1505 os.remove(logname)
1506 # Also delete any pid specific file from previous traces.
1507 for i in glob.iglob(logname + '.*'):
1508 if i.rsplit('.', 1)[1].isdigit():
1509 os.remove(i)
1510
1511 @classmethod
1512 def parse_log(cls, logname, blacklist):
1513 logging.info('parse_log(%s, %s)' % (logname, blacklist))
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001514 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001515 data = read_json(logname)
1516 out = []
1517 for item in data['traces']:
1518 result = {
1519 'trace': item['trace'],
1520 'output': item['output'],
1521 }
1522 try:
1523 context = cls.Context(blacklist, item['cwd'])
1524 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
1525 pid = pidfile.rsplit('.', 1)[1]
1526 if pid.isdigit():
1527 pid = int(pid)
1528 # TODO(maruel): Load as utf-8
1529 for line in open(pidfile, 'rb'):
1530 context.on_line(pid, line)
1531 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001532 except TracingFailure:
1533 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001534 out.append(result)
1535 return out
1536
1537
1538class Dtrace(ApiBase):
1539 """Uses DTrace framework through dtrace. Requires root access.
1540
1541 Implies Mac OSX.
1542
1543 dtruss can't be used because it has compatibility issues with python.
1544
1545 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1546 get the absolute path of the 'cwd' dtrace variable from the probe.
1547
1548 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1549 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1550 manually.
1551
1552 errno is not printed in the log since this implementation currently only cares
1553 about files that were successfully opened.
1554 """
1555 class Context(ApiBase.Context):
1556 # Format: index pid function(args)
1557 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1558
1559 # Arguments parsing.
1560 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1561 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1562 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1563 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1564 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1565 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1566
1567 O_DIRECTORY = 0x100000
1568
1569 class Process(ApiBase.Context.Process):
1570 def __init__(self, *args):
1571 super(Dtrace.Context.Process, self).__init__(*args)
1572 self.cwd = self.initial_cwd
1573
1574 def __init__(self, blacklist, tracer_pid, initial_cwd):
1575 logging.info(
1576 '%s(%d, %s)' % (self.__class__.__name__, tracer_pid, initial_cwd))
1577 super(Dtrace.Context, self).__init__(blacklist)
1578 # Process ID of the temporary script created by create_thunk().
1579 self._tracer_pid = tracer_pid
1580 self._initial_cwd = initial_cwd
1581 self._line_number = 0
1582
1583 def on_line(self, line):
1584 self._line_number += 1
1585 match = self.RE_HEADER.match(line)
1586 if not match:
1587 raise TracingFailure(
1588 'Found malformed line: %s' % line,
1589 None,
1590 self._line_number,
1591 line)
1592 fn = getattr(
1593 self,
1594 'handle_%s' % match.group(2).replace('-', '_'),
1595 self._handle_ignored)
1596 # It is guaranteed to succeed because of the regexp. Or at least I thought
1597 # it would.
1598 pid = int(match.group(1))
1599 try:
1600 return fn(pid, match.group(3))
1601 except TracingFailure, e:
1602 # Hack in the values since the handler could be a static function.
1603 e.pid = pid
1604 e.line = line
1605 e.line_number = self._line_number
1606 # Re-raise the modified exception.
1607 raise
1608 except (KeyError, NotImplementedError, ValueError), e:
1609 raise TracingFailure(
1610 'Trace generated a %s exception: %s' % (
1611 e.__class__.__name__, str(e)),
1612 pid,
1613 self._line_number,
1614 line,
1615 e)
1616
1617 def to_results(self):
1618 process = self.root_process.to_results_process()
1619 # Internal concistency check.
1620 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1621 raise TracingFailure(
1622 'Found internal inconsitency in process lifetime detection '
1623 'while looking for len(tree) == len(list)',
1624 None,
1625 None,
1626 None,
1627 sorted(self._process_lookup),
1628 sorted(p.pid for p in process.all))
1629 return Results(process)
1630
1631 def handle_dtrace_BEGIN(self, _pid, args):
1632 if not self.RE_DTRACE_BEGIN.match(args):
1633 raise TracingFailure(
1634 'Found internal inconsitency in dtrace_BEGIN log line',
1635 None, None, None)
1636
1637 def handle_proc_start(self, pid, args):
1638 """Transfers cwd.
1639
1640 The dtrace script already takes care of only tracing the processes that
1641 are child of the traced processes so there is no need to verify the
1642 process hierarchy.
1643 """
1644 if pid in self._process_lookup:
1645 raise TracingFailure(
1646 'Found internal inconsitency in proc_start: %d started two times' %
1647 pid,
1648 None, None, None)
1649 match = self.RE_PROC_START.match(args)
1650 if not match:
1651 raise TracingFailure(
1652 'Failed to parse arguments: %s' % args,
1653 None, None, None)
1654 ppid = int(match.group(1))
1655 if ppid == self._tracer_pid and not self.root_process:
1656 proc = self.root_process = self.Process(
1657 self.blacklist, pid, self._initial_cwd)
1658 elif ppid in self._process_lookup:
1659 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1660 self._process_lookup[ppid].children.append(proc)
1661 else:
1662 # Another process tree, ignore.
1663 return
1664 self._process_lookup[pid] = proc
1665 logging.debug(
1666 'New child: %s -> %d cwd:%s' %
1667 (ppid, pid, unicode(proc.initial_cwd)))
1668
1669 def handle_proc_exit(self, pid, _args):
1670 """Removes cwd."""
1671 if pid in self._process_lookup:
1672 # self._tracer_pid is not traced itself and other traces run neither.
1673 self._process_lookup[pid].cwd = None
1674
1675 def handle_execve(self, pid, args):
1676 """Sets the process' executable.
1677
1678 TODO(maruel): Read command line arguments. See
1679 https://discussions.apple.com/thread/1980539 for an example.
1680 https://gist.github.com/1242279
1681
1682 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1683 :)
1684 """
1685 if not pid in self._process_lookup:
1686 # Another process tree, ignore.
1687 return
1688 match = self.RE_EXECVE.match(args)
1689 if not match:
1690 raise TracingFailure(
1691 'Failed to parse arguments: %r' % args,
1692 None, None, None)
1693 proc = self._process_lookup[pid]
1694 proc.executable = match.group(1)
1695 proc.command = self.process_escaped_arguments(match.group(3))
1696 if int(match.group(2)) != len(proc.command):
1697 raise TracingFailure(
1698 'Failed to parse execve() arguments: %s' % args,
1699 None, None, None)
1700
1701 def handle_chdir(self, pid, args):
1702 """Updates cwd."""
1703 if pid not in self._process_lookup:
1704 # Another process tree, ignore.
1705 return
1706 cwd = self.RE_CHDIR.match(args).group(1)
1707 if not cwd.startswith('/'):
1708 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1709 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1710 else:
1711 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1712 cwd2 = cwd
1713 self._process_lookup[pid].cwd = cwd2
1714
1715 def handle_open_nocancel(self, pid, args):
1716 """Redirects to handle_open()."""
1717 return self.handle_open(pid, args)
1718
1719 def handle_open(self, pid, args):
1720 if pid not in self._process_lookup:
1721 # Another process tree, ignore.
1722 return
1723 match = self.RE_OPEN.match(args)
1724 if not match:
1725 raise TracingFailure(
1726 'Failed to parse arguments: %s' % args,
1727 None, None, None)
1728 flag = int(match.group(2), 16)
1729 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1730 # Ignore directories.
1731 return
1732 self._handle_file(pid, match.group(1))
1733
1734 def handle_rename(self, pid, args):
1735 if pid not in self._process_lookup:
1736 # Another process tree, ignore.
1737 return
1738 match = self.RE_RENAME.match(args)
1739 if not match:
1740 raise TracingFailure(
1741 'Failed to parse arguments: %s' % args,
1742 None, None, None)
1743 self._handle_file(pid, match.group(1))
1744 self._handle_file(pid, match.group(2))
1745
1746 def _handle_file(self, pid, filepath):
1747 if not filepath.startswith('/'):
1748 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1749 # We can get '..' in the path.
1750 filepath = os.path.normpath(filepath)
1751 # Sadly, still need to filter out directories here;
1752 # saw open_nocancel(".", 0, 0) = 0 lines.
1753 if os.path.isdir(filepath):
1754 return
1755 self._process_lookup[pid].add_file(filepath, False)
1756
1757 def handle_ftruncate(self, pid, args):
1758 """Just used as a signal to kill dtrace, ignoring."""
1759 pass
1760
1761 @staticmethod
1762 def _handle_ignored(pid, args):
1763 """Is called for all the event traces that are not handled."""
1764 raise NotImplementedError('Please implement me')
1765
1766 @staticmethod
1767 def process_escaped_arguments(text):
1768 """Extracts escaped arguments on a string and return the arguments as a
1769 list.
1770
1771 Implemented as an automaton.
1772
1773 Example:
1774 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1775 function will return ['python2.7', '-c', 'print("hi")]
1776 """
1777 if not text.endswith('\\0'):
1778 raise ValueError('String is not null terminated: %r' % text, text)
1779 text = text[:-2]
1780
1781 def unescape(x):
1782 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1783 out = []
1784 escaped = False
1785 for i in x:
1786 if i == '\\' and not escaped:
1787 escaped = True
1788 continue
1789 escaped = False
1790 out.append(i)
1791 return ''.join(out)
1792
1793 return [unescape(i) for i in text.split('\\001')]
1794
1795 class Tracer(ApiBase.Tracer):
1796 # pylint: disable=C0301
1797 #
1798 # To understand the following code, you'll want to take a look at:
1799 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
1800 # https://wikis.oracle.com/display/DTrace/Variables
1801 # http://docs.oracle.com/cd/E19205-01/820-4221/
1802 #
1803 # 0. Dump all the valid probes into a text file. It is important, you
1804 # want to redirect into a file and you don't want to constantly 'sudo'.
1805 # $ sudo dtrace -l > probes.txt
1806 #
1807 # 1. Count the number of probes:
1808 # $ wc -l probes.txt
1809 # 81823 # On OSX 10.7, including 1 header line.
1810 #
1811 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1812 # likes and skipping the header with NR>1:
1813 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1814 # dtrace
1815 # fbt
1816 # io
1817 # ip
1818 # lockstat
1819 # mach_trap
1820 # proc
1821 # profile
1822 # sched
1823 # syscall
1824 # tcp
1825 # vminfo
1826 #
1827 # 3. List of valid probes:
1828 # $ grep syscall probes.txt | less
1829 # or use dtrace directly:
1830 # $ sudo dtrace -l -P syscall | less
1831 #
1832 # trackedpid is an associative array where its value can be 0, 1 or 2.
1833 # 0 is for untracked processes and is the default value for items not
1834 # in the associative array.
1835 # 1 is for tracked processes.
1836 # 2 is for the script created by create_thunk() only. It is not tracked
1837 # itself but all its decendants are.
1838 #
1839 # The script will kill itself only once waiting_to_die == 1 and
1840 # current_processes == 0, so that both getlogin() was called and that
1841 # all traced processes exited.
1842 #
1843 # TODO(maruel): Use cacheable predicates. See
1844 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1845 D_CODE = """
1846 dtrace:::BEGIN {
1847 waiting_to_die = 0;
1848 current_processes = 0;
1849 logindex = 0;
1850 printf("%d %d %s_%s(\\"%s\\")\\n",
1851 logindex, PID, probeprov, probename, SCRIPT);
1852 logindex++;
1853 }
1854
1855 proc:::start /trackedpid[ppid]/ {
1856 trackedpid[pid] = 1;
1857 current_processes += 1;
1858 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1859 logindex, pid, probeprov, probename,
1860 ppid,
1861 execname,
1862 current_processes);
1863 logindex++;
1864 }
1865 /* Should use SCRIPT but there is no access to this variable at that
1866 * point. */
1867 proc:::start /ppid == PID && execname == "Python"/ {
1868 trackedpid[pid] = 2;
1869 current_processes += 1;
1870 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1871 logindex, pid, probeprov, probename,
1872 ppid,
1873 execname,
1874 current_processes);
1875 logindex++;
1876 }
1877 proc:::exit /trackedpid[pid] &&
1878 current_processes == 1 &&
1879 waiting_to_die == 1/ {
1880 trackedpid[pid] = 0;
1881 current_processes -= 1;
1882 printf("%d %d %s_%s(%d)\\n",
1883 logindex, pid, probeprov, probename,
1884 current_processes);
1885 logindex++;
1886 exit(0);
1887 }
1888 proc:::exit /trackedpid[pid]/ {
1889 trackedpid[pid] = 0;
1890 current_processes -= 1;
1891 printf("%d %d %s_%s(%d)\\n",
1892 logindex, pid, probeprov, probename,
1893 current_processes);
1894 logindex++;
1895 }
1896
1897 /* Use an arcane function to detect when we need to die */
1898 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
1899 waiting_to_die = 1;
1900 printf("%d %d %s()\\n", logindex, pid, probefunc);
1901 logindex++;
1902 }
1903 syscall::ftruncate:entry /
1904 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
1905 exit(0);
1906 }
1907
1908 syscall::open*:entry /trackedpid[pid] == 1/ {
1909 self->open_arg0 = arg0;
1910 self->open_arg1 = arg1;
1911 self->open_arg2 = arg2;
1912 }
1913 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
1914 this->open_arg0 = copyinstr(self->open_arg0);
1915 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
1916 logindex, pid, probefunc,
1917 this->open_arg0,
1918 self->open_arg1,
1919 self->open_arg2);
1920 logindex++;
1921 this->open_arg0 = 0;
1922 }
1923 syscall::open*:return /trackedpid[pid] == 1/ {
1924 self->open_arg0 = 0;
1925 self->open_arg1 = 0;
1926 self->open_arg2 = 0;
1927 }
1928
1929 syscall::rename:entry /trackedpid[pid] == 1/ {
1930 self->rename_arg0 = arg0;
1931 self->rename_arg1 = arg1;
1932 }
1933 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
1934 this->rename_arg0 = copyinstr(self->rename_arg0);
1935 this->rename_arg1 = copyinstr(self->rename_arg1);
1936 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
1937 logindex, pid, probefunc,
1938 this->rename_arg0,
1939 this->rename_arg1);
1940 logindex++;
1941 this->rename_arg0 = 0;
1942 this->rename_arg1 = 0;
1943 }
1944 syscall::rename:return /trackedpid[pid] == 1/ {
1945 self->rename_arg0 = 0;
1946 self->rename_arg1 = 0;
1947 }
1948
1949 /* Track chdir, it's painful because it is only receiving relative path.
1950 */
1951 syscall::chdir:entry /trackedpid[pid] == 1/ {
1952 self->chdir_arg0 = arg0;
1953 }
1954 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
1955 this->chdir_arg0 = copyinstr(self->chdir_arg0);
1956 printf("%d %d %s(\\"%s\\")\\n",
1957 logindex, pid, probefunc,
1958 this->chdir_arg0);
1959 logindex++;
1960 this->chdir_arg0 = 0;
1961 }
1962 syscall::chdir:return /trackedpid[pid] == 1/ {
1963 self->chdir_arg0 = 0;
1964 }
1965 """
1966
1967 # execve-specific code, tends to throw a lot of exceptions.
1968 D_CODE_EXECVE = """
1969 /* Finally what we care about! */
1970 syscall::exec*:entry /trackedpid[pid]/ {
1971 self->exec_arg0 = copyinstr(arg0);
1972 /* Incrementally probe for a NULL in the argv parameter of execve() to
1973 * figure out argc. */
1974 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
1975 * found. */
1976 self->exec_argc = 0;
1977 /* Probe for argc==1 */
1978 this->exec_argv = (user_addr_t*)copyin(
1979 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1980 self->exec_argc = this->exec_argv[self->exec_argc] ?
1981 (self->exec_argc + 1) : self->exec_argc;
1982
1983 /* Probe for argc==2 */
1984 this->exec_argv = (user_addr_t*)copyin(
1985 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1986 self->exec_argc = this->exec_argv[self->exec_argc] ?
1987 (self->exec_argc + 1) : self->exec_argc;
1988
1989 /* Probe for argc==3 */
1990 this->exec_argv = (user_addr_t*)copyin(
1991 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1992 self->exec_argc = this->exec_argv[self->exec_argc] ?
1993 (self->exec_argc + 1) : self->exec_argc;
1994
1995 /* Probe for argc==4 */
1996 this->exec_argv = (user_addr_t*)copyin(
1997 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1998 self->exec_argc = this->exec_argv[self->exec_argc] ?
1999 (self->exec_argc + 1) : self->exec_argc;
2000
2001 /* Copy the inputs strings since there is no guarantee they'll be
2002 * present after the call completed. */
2003 self->exec_argv0 = (self->exec_argc > 0) ?
2004 copyinstr(this->exec_argv[0]) : "";
2005 self->exec_argv1 = (self->exec_argc > 1) ?
2006 copyinstr(this->exec_argv[1]) : "";
2007 self->exec_argv2 = (self->exec_argc > 2) ?
2008 copyinstr(this->exec_argv[2]) : "";
2009 self->exec_argv3 = (self->exec_argc > 3) ?
2010 copyinstr(this->exec_argv[3]) : "";
2011 this->exec_argv = 0;
2012 }
2013 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2014 /* We need to join strings here, as using multiple printf() would
2015 * cause tearing when multiple threads/processes are traced.
2016 * Since it is impossible to escape a string and join it to another one,
2017 * like sprintf("%s%S", previous, more), use hackery.
2018 * Each of the elements are split with a \\1. \\0 cannot be used because
2019 * it is simply ignored. This will conflict with any program putting a
2020 * \\1 in their execve() string but this should be "rare enough" */
2021 this->args = "";
2022 /* Process exec_argv[0] */
2023 this->args = strjoin(
2024 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2025
2026 /* Process exec_argv[1] */
2027 this->args = strjoin(
2028 this->args, (self->exec_argc > 1) ? "\\1" : "");
2029 this->args = strjoin(
2030 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2031
2032 /* Process exec_argv[2] */
2033 this->args = strjoin(
2034 this->args, (self->exec_argc > 2) ? "\\1" : "");
2035 this->args = strjoin(
2036 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2037
2038 /* Process exec_argv[3] */
2039 this->args = strjoin(
2040 this->args, (self->exec_argc > 3) ? "\\1" : "");
2041 this->args = strjoin(
2042 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2043
2044 /* Prints self->exec_argc to permits verifying the internal
2045 * consistency since this code is quite fishy. */
2046 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2047 logindex, pid, probefunc,
2048 self->exec_arg0,
2049 self->exec_argc,
2050 this->args);
2051 logindex++;
2052 this->args = 0;
2053 }
2054 syscall::exec*:return /trackedpid[pid]/ {
2055 self->exec_arg0 = 0;
2056 self->exec_argc = 0;
2057 self->exec_argv0 = 0;
2058 self->exec_argv1 = 0;
2059 self->exec_argv2 = 0;
2060 self->exec_argv3 = 0;
2061 }
2062 """
2063
2064 # Code currently not used.
2065 D_EXTRANEOUS = """
2066 /* This is a good learning experience, since it traces a lot of things
2067 * related to the process and child processes.
2068 * Warning: it generates a gigantic log. For example, tracing
2069 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2070 * several minutes to execute.
2071 */
2072 /*
2073 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2074 printf("%d %d %s_%s() = %d\\n",
2075 logindex, pid, probeprov, probefunc, errno);
2076 logindex++;
2077 }
2078 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2079 printf("%d %d %s_%s() = %d\\n",
2080 logindex, pid, probeprov, probefunc, errno);
2081 logindex++;
2082 }
2083 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2084 printf("%d %d %s_%s() = %d\\n",
2085 logindex, pid, probeprov, probefunc, errno);
2086 logindex++;
2087 }
2088 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2089 printf("%d %d %s_%s() = %d\\n",
2090 logindex, pid, probeprov, probefunc, errno);
2091 logindex++;
2092 }
2093 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2094 printf("%d %d %s_%s() = %d\\n",
2095 logindex, pid, probeprov, probefunc, errno);
2096 logindex++;
2097 }
2098 */
2099 /* TODO(maruel): *stat* functions and friends
2100 syscall::access:return,
2101 syscall::chdir:return,
2102 syscall::chflags:return,
2103 syscall::chown:return,
2104 syscall::chroot:return,
2105 syscall::getattrlist:return,
2106 syscall::getxattr:return,
2107 syscall::lchown:return,
2108 syscall::lstat64:return,
2109 syscall::lstat:return,
2110 syscall::mkdir:return,
2111 syscall::pathconf:return,
2112 syscall::readlink:return,
2113 syscall::removexattr:return,
2114 syscall::setxattr:return,
2115 syscall::stat64:return,
2116 syscall::stat:return,
2117 syscall::truncate:return,
2118 syscall::unlink:return,
2119 syscall::utimes:return,
2120 */
2121 """
2122
2123 def __init__(self, logname):
2124 """Starts the log collection with dtrace.
2125
2126 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2127 this needs to wait for dtrace to be "warmed up".
2128 """
2129 super(Dtrace.Tracer, self).__init__(logname)
2130 self._script = create_thunk()
2131 # This unique dummy temp file is used to signal the dtrace script that it
2132 # should stop as soon as all the child processes are done. A bit hackish
2133 # but works fine enough.
2134 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2135 prefix='trace_signal_file')
2136
2137 # Note: do not use the -p flag. It's useless if the initial process quits
2138 # too fast, resulting in missing traces from the grand-children. The D
2139 # code manages the dtrace lifetime itself.
2140 trace_cmd = [
2141 'sudo',
2142 'dtrace',
2143 # Use a larger buffer if getting 'out of scratch space' errors.
2144 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2145 '-b', '10m',
2146 '-x', 'dynvarsize=10m',
2147 #'-x', 'dtrace_global_maxsize=1m',
2148 '-x', 'evaltime=exec',
2149 '-o', '/dev/stderr',
2150 '-q',
2151 '-n', self._get_dtrace_code(),
2152 ]
2153 with open(self._logname + '.log', 'wb') as logfile:
2154 self._dtrace = subprocess.Popen(
2155 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2156 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2157
2158 # Reads until one line is printed, which signifies dtrace is up and ready.
2159 with open(self._logname + '.log', 'rb') as logfile:
2160 while 'dtrace_BEGIN' not in logfile.readline():
2161 if self._dtrace.poll() is not None:
2162 # Do a busy wait. :/
2163 break
2164 logging.debug('dtrace started')
2165
2166 def _get_dtrace_code(self):
2167 """Setups the D code to implement child process tracking.
2168
2169 Injects the cookie in the script so it knows when to stop.
2170
2171 The script will detect any instance of the script created with
2172 create_thunk() and will start tracing it.
2173 """
2174 return (
2175 'inline int PID = %d;\n'
2176 'inline string SCRIPT = "%s";\n'
2177 'inline int FILE_ID = %d;\n'
2178 '\n'
2179 '%s') % (
2180 os.getpid(),
2181 self._script,
2182 self._dummy_file_id,
2183 self.D_CODE) + self.D_CODE_EXECVE
2184
2185 def trace(self, cmd, cwd, tracename, output):
2186 """Runs dtrace on an executable.
2187
2188 This dtruss is broken when it starts the process itself or when tracing
2189 child processes, this code starts a wrapper process
2190 generated with create_thunk() which starts the executable to trace.
2191 """
2192 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2193 assert os.path.isabs(cmd[0]), cmd[0]
2194 assert os.path.isabs(cwd), cwd
2195 assert os.path.normpath(cwd) == cwd, cwd
2196 with self._lock:
2197 if not self._initialized:
2198 raise TracingFailure(
2199 'Called Tracer.trace() on an unitialized object',
2200 None, None, None, tracename)
2201 assert tracename not in (i['trace'] for i in self._traces)
2202
2203 # Starts the script wrapper to start the child process. This signals the
2204 # dtrace script that this process is to be traced.
2205 stdout = stderr = None
2206 if output:
2207 stdout = subprocess.PIPE
2208 stderr = subprocess.STDOUT
2209 child_cmd = [
2210 sys.executable,
2211 self._script,
2212 tracename,
2213 ]
2214 # Call a dummy function so that dtrace knows I'm about to launch a process
2215 # that needs to be traced.
2216 # Yummy.
2217 child = subprocess.Popen(
2218 child_cmd + fix_python_path(cmd),
2219 stdin=subprocess.PIPE,
2220 stdout=stdout,
2221 stderr=stderr,
2222 cwd=cwd)
2223 logging.debug('Started child pid: %d' % child.pid)
2224
2225 out = child.communicate()[0]
2226 # This doesn't mean tracing is done, one of the grand-child process may
2227 # still be alive. It will be tracked with the dtrace script.
2228
2229 with self._lock:
2230 assert tracename not in (i['trace'] for i in self._traces)
2231 self._traces.append(
2232 {
2233 'cmd': cmd,
2234 'cwd': cwd,
2235 # The pid of strace process, not very useful.
2236 'pid': child.pid,
2237 'trace': tracename,
2238 'output': out,
2239 })
2240 return child.returncode, out
2241
2242 def close(self, timeout=None):
2243 """Terminates dtrace."""
2244 logging.debug('close(%s)' % timeout)
2245 try:
2246 try:
2247 super(Dtrace.Tracer, self).close(timeout)
2248 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002249 # ftruncate doesn't exist on Windows.
2250 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002251 if timeout:
2252 start = time.time()
2253 # Use polling. :/
2254 while (self._dtrace.poll() is None and
2255 (time.time() - start) < timeout):
2256 time.sleep(0.1)
2257 self._dtrace.kill()
2258 self._dtrace.wait()
2259 finally:
2260 # Make sure to kill it in any case.
2261 if self._dtrace.poll() is None:
2262 try:
2263 self._dtrace.kill()
2264 self._dtrace.wait()
2265 except OSError:
2266 pass
2267
2268 if self._dtrace.returncode != 0:
2269 # Warn about any dtrace failure but basically ignore it.
2270 print 'dtrace failure: %s' % self._dtrace.returncode
2271 finally:
2272 os.close(self._dummy_file_id)
2273 os.remove(self._dummy_file_name)
2274 os.remove(self._script)
2275
2276 def post_process_log(self):
2277 """Sorts the log back in order when each call occured.
2278
2279 dtrace doesn't save the buffer in strict order since it keeps one buffer
2280 per CPU.
2281 """
2282 super(Dtrace.Tracer, self).post_process_log()
2283 logname = self._logname + '.log'
2284 with open(logname, 'rb') as logfile:
2285 lines = [l for l in logfile if l.strip()]
2286 errors = [l for l in lines if l.startswith('dtrace:')]
2287 if errors:
2288 raise TracingFailure(
2289 'Found errors in the trace: %s' % '\n'.join(errors),
2290 None, None, None, logname)
2291 try:
2292 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2293 except ValueError:
2294 raise TracingFailure(
2295 'Found errors in the trace: %s' % '\n'.join(
2296 l for l in lines if l.split(' ', 1)[0].isdigit()),
2297 None, None, None, logname)
2298 with open(logname, 'wb') as logfile:
2299 logfile.write(''.join(lines))
2300
2301 @staticmethod
2302 def clean_trace(logname):
2303 for ext in ('', '.log'):
2304 if os.path.isfile(logname + ext):
2305 os.remove(logname + ext)
2306
2307 @classmethod
2308 def parse_log(cls, logname, blacklist):
2309 logging.info('parse_log(%s, ...)' % logname)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002310 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002311
2312 def blacklist_more(filepath):
2313 # All the HFS metadata is in the form /.vol/...
2314 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2315
2316 data = read_json(logname)
2317 out = []
2318 for item in data['traces']:
2319 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
2320 for line in open(logname + '.log', 'rb'):
2321 context.on_line(line)
2322 out.append(
2323 {
2324 'results': context.to_results(),
2325 'trace': item['trace'],
2326 'output': item['output'],
2327 })
2328 return out
2329
2330
2331class LogmanTrace(ApiBase):
2332 """Uses the native Windows ETW based tracing functionality to trace a child
2333 process.
2334
2335 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2336 the Windows Kernel doesn't have a concept of 'current working directory' at
2337 all. A Win32 process has a map of current directories, one per drive letter
2338 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2339 opened relative to another file_object or as an absolute path. All the current
2340 working directory logic is done in user mode.
2341 """
2342 class Context(ApiBase.Context):
2343 """Processes a ETW log line and keeps the list of existent and non
2344 existent files accessed.
2345
2346 Ignores directories.
2347 """
2348 # These indexes are for the stripped version in json.
2349 EVENT_NAME = 0
2350 TYPE = 1
2351 PID = 2
2352 TID = 3
2353 PROCESSOR_ID = 4
2354 TIMESTAMP = 5
2355 USER_DATA = 6
2356
2357 class Process(ApiBase.Context.Process):
2358 def __init__(self, *args):
2359 super(LogmanTrace.Context.Process, self).__init__(*args)
2360 # Handle file objects that succeeded.
2361 self.file_objects = {}
2362
2363 def __init__(self, blacklist, tracer_pid):
2364 logging.info('%s(%d)' % (self.__class__.__name__, tracer_pid))
2365 super(LogmanTrace.Context, self).__init__(blacklist)
2366 self._drive_map = DosDriveMap()
2367 # Threads mapping to the corresponding process id.
2368 self._threads_active = {}
2369 # Process ID of the tracer, e.g. tracer_inputs.py
2370 self._tracer_pid = tracer_pid
2371 self._line_number = 0
2372
2373 def on_line(self, line):
2374 """Processes a json Event line."""
2375 self._line_number += 1
2376 try:
2377 # By Opcode
2378 handler = getattr(
2379 self,
2380 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2381 None)
2382 if not handler:
2383 raise TracingFailure(
2384 'Unexpected event %s_%s' % (
2385 line[self.EVENT_NAME], line[self.TYPE]),
2386 None, None, None)
2387 handler(line)
2388 except TracingFailure, e:
2389 # Hack in the values since the handler could be a static function.
2390 e.pid = line[self.PID]
2391 e.line = line
2392 e.line_number = self._line_number
2393 # Re-raise the modified exception.
2394 raise
2395 except (KeyError, NotImplementedError, ValueError), e:
2396 raise TracingFailure(
2397 'Trace generated a %s exception: %s' % (
2398 e.__class__.__name__, str(e)),
2399 line[self.PID],
2400 self._line_number,
2401 line,
2402 e)
2403
2404 def to_results(self):
2405 if not self.root_process:
2406 raise TracingFailure(
2407 'Failed to detect the initial process',
2408 None, None, None)
2409 process = self.root_process.to_results_process()
2410 return Results(process)
2411
2412 def _thread_to_process(self, tid):
2413 """Finds the process from the thread id."""
2414 tid = int(tid, 16)
2415 pid = self._threads_active.get(tid)
2416 if not pid or not self._process_lookup.get(pid):
2417 return
2418 return self._process_lookup[pid]
2419
2420 @classmethod
2421 def handle_EventTrace_Header(cls, line):
2422 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2423 BUFFER_SIZE = cls.USER_DATA
2424 #VERSION = cls.USER_DATA + 1
2425 #PROVIDER_VERSION = cls.USER_DATA + 2
2426 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2427 #END_TIME = cls.USER_DATA + 4
2428 #TIMER_RESOLUTION = cls.USER_DATA + 5
2429 #MAX_FILE_SIZE = cls.USER_DATA + 6
2430 #LOG_FILE_MODE = cls.USER_DATA + 7
2431 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2432 #START_BUFFERS = cls.USER_DATA + 9
2433 #POINTER_SIZE = cls.USER_DATA + 10
2434 EVENTS_LOST = cls.USER_DATA + 11
2435 #CPU_SPEED = cls.USER_DATA + 12
2436 #LOGGER_NAME = cls.USER_DATA + 13
2437 #LOG_FILE_NAME = cls.USER_DATA + 14
2438 #BOOT_TIME = cls.USER_DATA + 15
2439 #PERF_FREQ = cls.USER_DATA + 16
2440 #START_TIME = cls.USER_DATA + 17
2441 #RESERVED_FLAGS = cls.USER_DATA + 18
2442 #BUFFERS_LOST = cls.USER_DATA + 19
2443 #SESSION_NAME_STRING = cls.USER_DATA + 20
2444 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2445 if line[EVENTS_LOST] != '0':
2446 raise TracingFailure(
2447 ( '%s events were lost during trace, please increase the buffer '
2448 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2449 None, None, None)
2450
2451 def handle_FileIo_Cleanup(self, line):
2452 """General wisdom: if a file is closed, it's because it was opened.
2453
2454 Note that FileIo_Close is not used since if a file was opened properly but
2455 not closed before the process exits, only Cleanup will be logged.
2456 """
2457 #IRP = self.USER_DATA
2458 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2459 FILE_OBJECT = self.USER_DATA + 2
2460 #FILE_KEY = self.USER_DATA + 3
2461 proc = self._thread_to_process(line[TTID])
2462 if not proc:
2463 # Not a process we care about.
2464 return
2465 file_object = line[FILE_OBJECT]
2466 if file_object in proc.file_objects:
2467 proc.add_file(proc.file_objects.pop(file_object), False)
2468
2469 def handle_FileIo_Create(self, line):
2470 """Handles a file open.
2471
2472 All FileIo events are described at
2473 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2474 for some value of 'description'.
2475
2476 " (..) process and thread id values of the IO events (..) are not valid "
2477 http://msdn.microsoft.com/magazine/ee358703.aspx
2478
2479 The FileIo.Create event doesn't return if the CreateFile() call
2480 succeeded, so keep track of the file_object and check that it is
2481 eventually closed with FileIo_Cleanup.
2482 """
2483 #IRP = self.USER_DATA
2484 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2485 FILE_OBJECT = self.USER_DATA + 2
2486 #CREATE_OPTIONS = self.USER_DATA + 3
2487 #FILE_ATTRIBUTES = self.USER_DATA + 4
2488 #self.USER_DATA + SHARE_ACCESS = 5
2489 OPEN_PATH = self.USER_DATA + 6
2490
2491 proc = self._thread_to_process(line[TTID])
2492 if not proc:
2493 # Not a process we care about.
2494 return
2495
2496 match = re.match(r'^\"(.+)\"$', line[OPEN_PATH])
2497 raw_path = match.group(1)
2498 # Ignore directories and bare drive right away.
2499 if raw_path.endswith(os.path.sep):
2500 return
2501 filepath = self._drive_map.to_win32(raw_path)
2502 # Ignore bare drive right away. Some may still fall through with format
2503 # like '\\?\X:'
2504 if len(filepath) == 2:
2505 return
2506 file_object = line[FILE_OBJECT]
2507 if os.path.isdir(filepath):
2508 # There is no O_DIRECTORY equivalent on Windows. The closed is
2509 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2510 # simply discard directories are they are found.
2511 return
2512 # Override any stale file object
2513 proc.file_objects[file_object] = filepath
2514
2515 def handle_FileIo_Rename(self, line):
2516 # TODO(maruel): Handle?
2517 pass
2518
2519 def handle_Process_End(self, line):
2520 pid = line[self.PID]
2521 if self._process_lookup.get(pid):
2522 logging.info('Terminated: %d' % pid)
2523 self._process_lookup[pid] = None
2524 else:
2525 logging.debug('Terminated: %d' % pid)
2526
2527 def handle_Process_Start(self, line):
2528 """Handles a new child process started by PID."""
2529 #UNIQUE_PROCESS_KEY = self.USER_DATA
2530 PROCESS_ID = self.USER_DATA + 1
2531 #PARENT_PID = self.USER_DATA + 2
2532 #SESSION_ID = self.USER_DATA + 3
2533 #EXIT_STATUS = self.USER_DATA + 4
2534 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2535 #USER_SID = self.USER_DATA + 6
2536 IMAGE_FILE_NAME = self.USER_DATA + 7
2537 COMMAND_LINE = self.USER_DATA + 8
2538
2539 ppid = line[self.PID]
2540 pid = int(line[PROCESS_ID], 16)
2541 logging.debug(
2542 'New process %d->%d (%s) %s' %
2543 (ppid, pid, line[IMAGE_FILE_NAME], line[COMMAND_LINE]))
2544
2545 if ppid == self._tracer_pid:
2546 # Need to ignore processes we don't know about because the log is
2547 # system-wide. self._tracer_pid shall start only one process.
2548 if self.root_process:
2549 raise TracingFailure(
2550 ( 'Parent process is _tracer_pid(%d) but root_process(%d) is '
2551 'already set') % (self._tracer_pid, self.root_process.pid),
2552 None, None, None)
2553 proc = self.Process(self.blacklist, pid, None)
2554 self.root_process = proc
2555 ppid = None
2556 elif self._process_lookup.get(ppid):
2557 proc = self.Process(self.blacklist, pid, None)
2558 self._process_lookup[ppid].children.append(proc)
2559 else:
2560 # Ignore
2561 return
2562 self._process_lookup[pid] = proc
2563
2564 if (not line[IMAGE_FILE_NAME].startswith('"') or
2565 not line[IMAGE_FILE_NAME].endswith('"')):
2566 raise TracingFailure(
2567 'Command line is not properly quoted: %s' % line[IMAGE_FILE_NAME],
2568 None, None, None)
2569
2570 # TODO(maruel): Process escapes.
2571 if (not line[COMMAND_LINE].startswith('"') or
2572 not line[COMMAND_LINE].endswith('"')):
2573 raise TracingFailure(
2574 'Command line is not properly quoted: %s' % line[COMMAND_LINE],
2575 None, None, None)
2576 proc.command = CommandLineToArgvW(line[COMMAND_LINE][1:-1])
2577 proc.executable = line[IMAGE_FILE_NAME][1:-1]
2578 # proc.command[0] may be the absolute path of 'executable' but it may be
2579 # anything else too. If it happens that command[0] ends with executable,
2580 # use it, otherwise defaults to the base name.
2581 cmd0 = proc.command[0].lower()
2582 if not cmd0.endswith('.exe'):
2583 # TODO(maruel): That's not strictly true either.
2584 cmd0 += '.exe'
2585 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
2586 # Fix the path.
2587 cmd0 = cmd0.replace('/', os.path.sep)
2588 cmd0 = os.path.normpath(cmd0)
2589 proc.executable = get_native_path_case(cmd0)
2590 logging.info(
2591 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2592
2593 def handle_Thread_End(self, line):
2594 """Has the same parameters as Thread_Start."""
2595 tid = int(line[self.TID], 16)
2596 self._threads_active.pop(tid, None)
2597
2598 def handle_Thread_Start(self, line):
2599 """Handles a new thread created.
2600
2601 Do not use self.PID here since a process' initial thread is created by
2602 the parent process.
2603 """
2604 PROCESS_ID = self.USER_DATA
2605 TTHREAD_ID = self.USER_DATA + 1
2606 #STACK_BASE = self.USER_DATA + 2
2607 #STACK_LIMIT = self.USER_DATA + 3
2608 #USER_STACK_BASE = self.USER_DATA + 4
2609 #USER_STACK_LIMIT = self.USER_DATA + 5
2610 #AFFINITY = self.USER_DATA + 6
2611 #WIN32_START_ADDR = self.USER_DATA + 7
2612 #TEB_BASE = self.USER_DATA + 8
2613 #SUB_PROCESS_TAG = self.USER_DATA + 9
2614 #BASE_PRIORITY = self.USER_DATA + 10
2615 #PAGE_PRIORITY = self.USER_DATA + 11
2616 #IO_PRIORITY = self.USER_DATA + 12
2617 #THREAD_FLAGS = self.USER_DATA + 13
2618 # Do not use self.PID here since a process' initial thread is created by
2619 # the parent process.
2620 pid = int(line[PROCESS_ID], 16)
2621 tid = int(line[TTHREAD_ID], 16)
2622 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2623 self._threads_active[tid] = pid
2624
2625 @classmethod
2626 def supported_events(cls):
2627 """Returns all the procesed events."""
2628 out = []
2629 for member in dir(cls):
2630 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2631 if match:
2632 out.append(match.groups())
2633 return out
2634
2635 class Tracer(ApiBase.Tracer):
2636 # The basic headers.
2637 EXPECTED_HEADER = [
2638 u'Event Name',
2639 u'Type',
2640 u'Event ID',
2641 u'Version',
2642 u'Channel',
2643 u'Level', # 5
2644 u'Opcode',
2645 u'Task',
2646 u'Keyword',
2647 u'PID',
2648 u'TID', # 10
2649 u'Processor Number',
2650 u'Instance ID',
2651 u'Parent Instance ID',
2652 u'Activity ID',
2653 u'Related Activity ID', # 15
2654 u'Clock-Time',
2655 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2656 u'User(ms)', # pretty much useless.
2657 u'User Data', # Extra arguments that are event-specific.
2658 ]
2659 # Only the useful headers common to all entries are listed there. Any column
2660 # at 19 or higher is dependent on the specific event.
2661 EVENT_NAME = 0
2662 TYPE = 1
2663 PID = 9
2664 TID = 10
2665 PROCESSOR_ID = 11
2666 TIMESTAMP = 16
2667 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2668 USER_DATA = 19
2669
2670 def __init__(self, logname):
2671 """Starts the log collection.
2672
2673 Requires administrative access. logman.exe is synchronous so no need for a
2674 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2675 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2676
2677 One can get the list of potentially interesting providers with:
2678 "logman query providers | findstr /i file"
2679 """
2680 super(LogmanTrace.Tracer, self).__init__(logname)
2681 self._script = create_thunk()
2682 cmd_start = [
2683 'logman.exe',
2684 'start',
2685 'NT Kernel Logger',
2686 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2687 # splitio,fileiocompletion,syscall,file,cswitch,img
2688 '(process,fileio,thread)',
2689 '-o', self._logname + '.etl',
2690 '-ets', # Send directly to kernel
2691 # Values extracted out of thin air.
2692 # Event Trace Session buffer size in kb.
2693 '-bs', '10240',
2694 # Number of Event Trace Session buffers.
2695 '-nb', '16', '256',
2696 ]
2697 logging.debug('Running: %s' % cmd_start)
2698 try:
2699 subprocess.check_call(
2700 cmd_start,
2701 stdin=subprocess.PIPE,
2702 stdout=subprocess.PIPE,
2703 stderr=subprocess.STDOUT)
2704 except subprocess.CalledProcessError, e:
2705 if e.returncode == -2147024891:
2706 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2707 elif e.returncode == -2144337737:
2708 print >> sys.stderr, (
2709 'A kernel trace was already running, stop it and try again')
2710 raise
2711
2712 def trace(self, cmd, cwd, tracename, output):
2713 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2714 assert os.path.isabs(cmd[0]), cmd[0]
2715 assert os.path.isabs(cwd), cwd
2716 assert os.path.normpath(cwd) == cwd, cwd
2717 with self._lock:
2718 if not self._initialized:
2719 raise TracingFailure(
2720 'Called Tracer.trace() on an unitialized object',
2721 None, None, None, tracename)
2722 assert tracename not in (i['trace'] for i in self._traces)
2723
2724 # Use "logman -?" for help.
2725
2726 stdout = stderr = None
2727 if output:
2728 stdout = subprocess.PIPE
2729 stderr = subprocess.STDOUT
2730
2731 # Run the child process.
2732 logging.debug('Running: %s' % cmd)
2733 # Use the temporary script generated with create_thunk() so we have a
2734 # clear pid owner. Since trace_inputs.py can be used as a library and
2735 # could trace multiple processes simultaneously, it makes it more complex
2736 # if the executable to be traced is executed directly here. It also solves
2737 # issues related to logman.exe that needs to be executed to control the
2738 # kernel trace.
2739 child_cmd = [
2740 sys.executable,
2741 self._script,
2742 tracename,
2743 ]
2744 child = subprocess.Popen(
2745 child_cmd + fix_python_path(cmd),
2746 cwd=cwd,
2747 stdin=subprocess.PIPE,
2748 stdout=stdout,
2749 stderr=stderr)
2750 logging.debug('Started child pid: %d' % child.pid)
2751 out = child.communicate()[0]
2752 # This doesn't mean all the grand-children are done. Sadly, we don't have
2753 # a good way to determine that.
2754
2755 with self._lock:
2756 assert tracename not in (i['trace'] for i in self._traces)
2757 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002758 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002759 'cwd': cwd,
2760 'pid': child.pid,
2761 'trace': tracename,
2762 'output': out,
2763 })
2764
2765 return child.returncode, out
2766
2767 def close(self, _timeout=None):
2768 """Stops the kernel log collection and converts the traces to text
2769 representation.
2770 """
2771 with self._lock:
2772 if not self._initialized:
2773 raise TracingFailure(
2774 'Called Tracer.close() on an unitialized object',
2775 None, None, None)
2776 os.remove(self._script)
2777 # Save metadata, add 'format' key..
2778 data = {
2779 'format': 'csv',
2780 'traces': self._traces,
2781 }
2782 write_json(self._logname, data, False)
2783
2784 cmd_stop = [
2785 'logman.exe',
2786 'stop',
2787 'NT Kernel Logger',
2788 '-ets', # Sends the command directly to the kernel.
2789 ]
2790 logging.debug('Running: %s' % cmd_stop)
2791 subprocess.check_call(
2792 cmd_stop,
2793 stdin=subprocess.PIPE,
2794 stdout=subprocess.PIPE,
2795 stderr=subprocess.STDOUT)
2796 self._initialized = False
2797
2798 def post_process_log(self):
2799 """Converts the .etl file into .csv then into .json."""
2800 super(LogmanTrace.Tracer, self).post_process_log()
2801 logformat = 'csv'
2802 self._convert_log(logformat)
2803
2804 if logformat == 'csv_utf16':
2805 def load_file():
2806 def utf_8_encoder(unicode_csv_data):
2807 """Encodes the unicode object as utf-8 encoded str instance"""
2808 for line in unicode_csv_data:
2809 yield line.encode('utf-8')
2810
2811 def unicode_csv_reader(unicode_csv_data, **kwargs):
2812 """Encodes temporarily as UTF-8 since csv module doesn't do unicode.
2813 """
2814 csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), **kwargs)
2815 for row in csv_reader:
2816 # Decode str utf-8 instances back to unicode instances, cell by
2817 # cell:
2818 yield [cell.decode('utf-8') for cell in row]
2819
2820 # The CSV file is UTF-16 so use codecs.open() to load the file into
2821 # the python internal unicode format (utf-8). Then explicitly
2822 # re-encode as utf8 as str instances so csv can parse it fine. Then
2823 # decode the utf-8 str back into python unicode instances. This
2824 # sounds about right.
2825 for line in unicode_csv_reader(
2826 codecs.open(self._logname + '.' + logformat, 'r', 'utf-16')):
2827 # line is a list of unicode objects
2828 # So much white space!
2829 yield [i.strip() for i in line]
2830
2831 elif logformat == 'csv':
2832 def load_file():
2833 def ansi_csv_reader(ansi_csv_data, **kwargs):
2834 """Loads an 'ANSI' code page and returns unicode() objects."""
2835 assert sys.getfilesystemencoding() == 'mbcs'
2836 encoding = get_current_encoding()
2837 for row in csv.reader(ansi_csv_data, **kwargs):
2838 # Decode str 'ansi' instances to unicode instances, cell by cell:
2839 yield [cell.decode(encoding) for cell in row]
2840
2841 # The fastest and smallest format but only supports 'ANSI' file paths.
2842 # E.g. the filenames are encoding in the 'current' encoding.
2843 for line in ansi_csv_reader(open(self._logname + '.' + logformat)):
2844 # line is a list of unicode objects.
2845 yield [i.strip() for i in line]
2846
2847 supported_events = LogmanTrace.Context.supported_events()
2848
2849 def trim(generator):
2850 for index, line in enumerate(generator):
2851 if not index:
2852 if line != self.EXPECTED_HEADER:
2853 raise TracingFailure(
2854 'Found malformed header: %s' % ' '.join(line),
2855 None, None, None)
2856 continue
2857 # As you can see, the CSV is full of useful non-redundant information:
2858 if (line[2] != '0' or # Event ID
2859 line[3] not in ('2', '3') or # Version
2860 line[4] != '0' or # Channel
2861 line[5] != '0' or # Level
2862 line[7] != '0' or # Task
2863 line[8] != '0x0000000000000000' or # Keyword
2864 line[12] != '' or # Instance ID
2865 line[13] != '' or # Parent Instance ID
2866 line[14] != self.NULL_GUID or # Activity ID
2867 line[15] != ''): # Related Activity ID
2868 raise TracingFailure(
2869 'Found unexpected values in line: %s' % ' '.join(line),
2870 None, None, None)
2871
2872 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
2873 continue
2874
2875 # Convert the PID in-place from hex.
2876 line[self.PID] = int(line[self.PID], 16)
2877
2878 yield [
2879 line[self.EVENT_NAME],
2880 line[self.TYPE],
2881 line[self.PID],
2882 line[self.TID],
2883 line[self.PROCESSOR_ID],
2884 line[self.TIMESTAMP],
2885 ] + line[self.USER_DATA:]
2886
2887 write_json('%s.json' % self._logname, list(trim(load_file())), True)
2888
2889 def _convert_log(self, logformat):
2890 """Converts the ETL trace to text representation.
2891
2892 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
2893 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
2894
2895 Arguments:
2896 - logformat: Text format to be generated, csv, csv_utf16 or xml.
2897
2898 Use "tracerpt -?" for help.
2899 """
2900 LOCALE_INVARIANT = 0x7F
2901 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
2902 cmd_convert = [
2903 'tracerpt.exe',
2904 '-l', self._logname + '.etl',
2905 '-o', self._logname + '.' + logformat,
2906 '-gmt', # Use UTC
2907 '-y', # No prompt
2908 # Use -of XML to get the header of each items after column 19, e.g. all
2909 # the actual headers of 'User Data'.
2910 ]
2911
2912 if logformat == 'csv':
2913 # tracerpt localizes the 'Type' column, for major brainfuck
2914 # entertainment. I can't imagine any sane reason to do that.
2915 cmd_convert.extend(['-of', 'CSV'])
2916 elif logformat == 'csv_utf16':
2917 # This causes it to use UTF-16, which doubles the log size but ensures
2918 # the log is readable for non-ASCII characters.
2919 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
2920 elif logformat == 'xml':
2921 cmd_convert.extend(['-of', 'XML'])
2922 else:
2923 raise ValueError('Unexpected log format \'%s\'' % logformat)
2924 logging.debug('Running: %s' % cmd_convert)
2925 # This can takes tens of minutes for large logs.
2926 # Redirects all output to stderr.
2927 subprocess.check_call(
2928 cmd_convert,
2929 stdin=subprocess.PIPE,
2930 stdout=sys.stderr,
2931 stderr=sys.stderr)
2932
2933 @staticmethod
2934 def clean_trace(logname):
2935 for ext in ('', '.csv', '.etl', '.json', '.xml'):
2936 if os.path.isfile(logname + ext):
2937 os.remove(logname + ext)
2938
2939 @classmethod
2940 def parse_log(cls, logname, blacklist):
2941 logging.info('parse_log(%s, %s)' % (logname, blacklist))
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002942 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002943
2944 def blacklist_more(filepath):
2945 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
2946 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
2947
2948 data = read_json(logname)
2949 lines = read_json(logname + '.json')
2950 out = []
2951 for item in data['traces']:
2952 context = cls.Context(blacklist_more, item['pid'])
2953 for line in lines:
2954 context.on_line(line)
2955 out.append(
2956 {
2957 'results': context.to_results(),
2958 'trace': item['trace'],
2959 'output': item['output'],
2960 })
2961 return out
2962
2963
2964def get_api():
2965 """Returns the correct implementation for the current OS."""
2966 if sys.platform == 'cygwin':
2967 raise NotImplementedError(
2968 'Not implemented for cygwin, start the script from Win32 python')
2969 flavors = {
2970 'win32': LogmanTrace,
2971 'darwin': Dtrace,
2972 'sunos5': Dtrace,
2973 'freebsd7': Dtrace,
2974 'freebsd8': Dtrace,
2975 }
2976 # Defaults to strace.
2977 return flavors.get(sys.platform, Strace)()
2978
2979
2980def extract_directories(root_dir, files, blacklist):
2981 """Detects if all the files in a directory are in |files| and if so, replace
2982 the individual files by a Results.Directory instance.
2983
2984 Takes a list of Results.File instances and returns a shorter list of
2985 Results.File and Results.Directory instances.
2986
2987 Arguments:
2988 - root_dir: Optional base directory that shouldn't be search further.
2989 - files: list of Results.File instances.
2990 - blacklist: regexp of files to ignore, for example r'.+\.pyc'.
2991 """
2992 logging.info(
2993 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
2994 assert not (root_dir or '').endswith(os.path.sep), root_dir
2995 assert not root_dir or (get_native_path_case(root_dir) == root_dir)
2996 assert not any(isinstance(f, Results.Directory) for f in files)
2997 # Remove non existent files.
2998 files = [f for f in files if f.existent]
2999 if not files:
3000 return files
3001 # All files must share the same root, which can be None.
3002 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3003
3004 # Creates a {directory: {filename: File}} mapping, up to root.
3005 buckets = {}
3006 if root_dir:
3007 buckets[root_dir] = {}
3008 for fileobj in files:
3009 path = fileobj.full_path
3010 directory = os.path.dirname(path)
3011 assert directory
3012 # Do not use os.path.basename() so trailing os.path.sep is kept.
3013 basename = path[len(directory)+1:]
3014 files_in_directory = buckets.setdefault(directory, {})
3015 files_in_directory[basename] = fileobj
3016 # Add all the directories recursively up to root.
3017 while True:
3018 old_d = directory
3019 directory = os.path.dirname(directory)
3020 if directory + os.path.sep == root_dir or directory == old_d:
3021 break
3022 buckets.setdefault(directory, {})
3023
3024 root_prefix = len(root_dir) + 1 if root_dir else 0
3025 for directory in sorted(buckets, reverse=True):
3026 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3027 expected = set(buckets[directory])
3028 if not (actual - expected):
3029 parent = os.path.dirname(directory)
3030 buckets[parent][os.path.basename(directory)] = Results.Directory(
3031 root_dir,
3032 directory[root_prefix:],
3033 False,
3034 sum(f.size for f in buckets[directory].itervalues()),
3035 sum(f.nb_files for f in buckets[directory].itervalues()))
3036 # Remove the whole bucket.
3037 del buckets[directory]
3038
3039 # Reverse the mapping with what remains. The original instances are returned,
3040 # so the cached meta data is kept.
3041 files = sum((x.values() for x in buckets.itervalues()), [])
3042 return sorted(files, key=lambda x: x.path)
3043
3044
3045def trace(logfile, cmd, cwd, api, output):
3046 """Traces an executable. Returns (returncode, output) from api.
3047
3048 Arguments:
3049 - logfile: file to write to.
3050 - cmd: command to run.
3051 - cwd: current directory to start the process in.
3052 - api: a tracing api instance.
3053 - output: if True, returns output, otherwise prints it at the console.
3054 """
3055 cmd = fix_python_path(cmd)
3056 api.clean_trace(logfile)
3057 with api.get_tracer(logfile) as tracer:
3058 return tracer.trace(cmd, cwd, 'default', output)
3059
3060
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003061def CMDclean(args):
3062 """Cleans up traces."""
3063 parser = OptionParserTraceInputs(command='clean')
3064 options, args = parser.parse_args(args)
3065 api = get_api()
3066 api.clean_trace(options.log)
3067 return 0
3068
3069
3070def CMDtrace(args):
3071 """Traces an executable."""
3072 parser = OptionParserTraceInputs(command='trace')
3073 parser.allow_interspersed_args = False
3074 parser.add_option(
3075 '-q', '--quiet', action='store_true',
3076 help='Redirects traced executable output to /dev/null')
3077 options, args = parser.parse_args(args)
3078
3079 if not args:
3080 parser.error('Please provide a command to run')
3081
3082 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3083 args[0] = os.path.abspath(args[0])
3084
3085 api = get_api()
3086 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3087
3088
3089def CMDread(args):
3090 """Reads the logs and prints the result."""
3091 parser = OptionParserTraceInputs(command='read')
3092 parser.add_option(
3093 '-V', '--variable',
3094 nargs=2,
3095 action='append',
3096 dest='variables',
3097 metavar='VAR_NAME directory',
3098 default=[],
3099 help=('Variables to replace relative directories against. Example: '
3100 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3101 'home dir with $HOME') % getpass.getuser())
3102 parser.add_option(
3103 '--root-dir',
3104 help='Root directory to base everything off it. Anything outside of this '
3105 'this directory will not be reported')
3106 parser.add_option(
3107 '-j', '--json', action='store_true',
3108 help='Outputs raw result data as json')
3109 parser.add_option(
3110 '-b', '--blacklist', action='append', default=[],
3111 help='List of regexp to use as blacklist filter')
3112 options, args = parser.parse_args(args)
3113
3114 if options.root_dir:
3115 options.root_dir = os.path.abspath(options.root_dir)
3116
3117 variables = dict(options.variables)
3118 api = get_api()
3119 def blacklist(f):
3120 return any(re.match(b, f) for b in options.blacklist)
3121 data = api.parse_log(options.log, blacklist)
3122 # Process each trace.
3123 output_as_json = []
3124 for item in data:
3125 if 'exception' in item:
maruel@chromium.org424f5692012-10-15 15:42:38 +00003126 # Do not abort the other traces.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003127 print >> sys.stderr, (
maruel@chromium.org424f5692012-10-15 15:42:38 +00003128 'Trace %s: Got an exception: %s' % (
3129 item['trace'], item['exception'][1]))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003130 continue
3131 results = item['results']
3132 if options.root_dir:
3133 results = results.strip_root(options.root_dir)
3134
3135 if options.json:
3136 output_as_json.append(results.flatten())
3137 else:
3138 simplified = extract_directories(
3139 options.root_dir, results.files, blacklist)
3140 simplified = [f.replace_variables(variables) for f in simplified]
3141 if len(data) > 1:
3142 print('Trace: %s' % item['trace'])
3143 print('Total: %d' % len(results.files))
3144 print('Non existent: %d' % len(results.non_existent))
3145 for f in results.non_existent:
3146 print(' %s' % f.path)
3147 print(
3148 'Interesting: %d reduced to %d' % (
3149 len(results.existent), len(simplified)))
3150 for f in simplified:
3151 print(' %s' % f.path)
3152
3153 if options.json:
3154 write_json(sys.stdout, output_as_json, False)
3155 return 0
3156
3157
3158class OptionParserWithLogging(optparse.OptionParser):
3159 """Adds --verbose option."""
3160 def __init__(self, verbose=0, **kwargs):
3161 optparse.OptionParser.__init__(self, **kwargs)
3162 self.add_option(
3163 '-v', '--verbose',
3164 action='count',
3165 default=verbose,
3166 help='Use multiple times to increase verbosity')
3167
3168 def parse_args(self, *args, **kwargs):
3169 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
3170 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
3171 logging.basicConfig(
3172 level=levels[min(len(levels)-1, options.verbose)],
3173 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
3174 return options, args
3175
3176
3177class OptionParserWithNiceDescription(OptionParserWithLogging):
3178 """Generates the description with the command's docstring."""
3179 def __init__(self, **kwargs):
3180 """Sets 'description' and 'usage' if not already specified."""
3181 command = kwargs.pop('command', 'help')
3182 kwargs.setdefault(
3183 'description',
3184 re.sub('[\r\n ]{2,}', ' ', get_command_handler(command).__doc__))
3185 kwargs.setdefault('usage', '%%prog %s [options]' % command)
3186 OptionParserWithLogging.__init__(self, **kwargs)
3187
3188
3189class OptionParserTraceInputs(OptionParserWithNiceDescription):
3190 """Adds automatic --log handling."""
3191 def __init__(self, **kwargs):
3192 OptionParserWithNiceDescription.__init__(self, **kwargs)
3193 self.add_option(
3194 '-l', '--log', help='Log file to generate or read, required')
3195
3196 def parse_args(self, *args, **kwargs):
3197 """Makes sure the paths make sense.
3198
3199 On Windows, / and \ are often mixed together in a path.
3200 """
3201 options, args = OptionParserWithNiceDescription.parse_args(
3202 self, *args, **kwargs)
3203 if not options.log:
3204 self.error('Must supply a log file with -l')
3205 options.log = os.path.abspath(options.log)
3206 return options, args
3207
3208
3209def extract_documentation():
3210 """Returns a dict {command: description} for each of documented command."""
3211 commands = (
3212 fn[3:]
3213 for fn in dir(sys.modules['__main__'])
3214 if fn.startswith('CMD') and get_command_handler(fn[3:]).__doc__)
3215 return dict((fn, get_command_handler(fn).__doc__) for fn in commands)
3216
3217
3218def CMDhelp(args):
3219 """Prints list of commands or help for a specific command."""
3220 doc = extract_documentation()
3221 # Calculates the optimal offset.
3222 offset = max(len(cmd) for cmd in doc)
3223 format_str = ' %-' + str(offset + 2) + 's %s'
3224 # Generate a one-liner documentation of each commands.
3225 commands_description = '\n'.join(
3226 format_str % (cmd, doc[cmd].split('\n')[0]) for cmd in sorted(doc))
3227
3228 parser = OptionParserWithNiceDescription(
3229 usage='%prog <command> [options]',
3230 description='Commands are:\n%s\n' % commands_description)
3231 parser.format_description = lambda _: parser.description
3232
3233 # Strip out any -h or --help argument.
3234 _, args = parser.parse_args([i for i in args if not i in ('-h', '--help')])
3235 if len(args) == 1:
3236 if not get_command_handler(args[0]):
3237 parser.error('Unknown command %s' % args[0])
3238 # The command was "%prog help command", replaces ourself with
3239 # "%prog command --help" so help is correctly printed out.
3240 return main(args + ['--help'])
3241 elif args:
3242 parser.error('Unknown argument "%s"' % ' '.join(args))
3243 parser.print_help()
3244 return 0
3245
3246
3247def get_command_handler(name):
3248 """Returns the command handler or CMDhelp if it doesn't exist."""
3249 return getattr(sys.modules['__main__'], 'CMD%s' % name, None)
3250
3251
3252def main_impl(argv):
3253 command = get_command_handler(argv[0] if argv else 'help')
3254 if not command:
3255 return CMDhelp(argv)
3256 return command(argv[1:])
3257
3258def main(argv):
3259 try:
3260 main_impl(argv)
3261 except TracingFailure, e:
3262 sys.stderr.write('\nError: ')
3263 sys.stderr.write(str(e))
3264 sys.stderr.write('\n')
3265 return 1
3266
3267
3268if __name__ == '__main__':
3269 sys.exit(main(sys.argv[1:]))