blob: 5b23cfd1d6a336b19d6bc8271f380797a2387ff6 [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
3# Copyright (c) 2012 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
23import getpass
24import glob
25import json
26import logging
maruel@chromium.org306e0e72012-11-02 18:22:03 +000027import unicodedata
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000028import optparse
29import os
30import re
31import subprocess
32import sys
33import tempfile
34import threading
35import time
36import weakref
37
38## OS-specific imports
39
40if sys.platform == 'win32':
41 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p
42 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611
43 from ctypes.wintypes import GetLastError # pylint: disable=E0611
44elif sys.platform == 'darwin':
45 import Carbon.File # pylint: disable=F0401
46 import MacOS # pylint: disable=F0401
47
48
49BASE_DIR = os.path.dirname(os.path.abspath(__file__))
50ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
51
52
53class TracingFailure(Exception):
54 """An exception occured during tracing."""
55 def __init__(self, description, pid, line_number, line, *args):
56 super(TracingFailure, self).__init__(
57 description, pid, line_number, line, *args)
58 self.description = description
59 self.pid = pid
60 self.line_number = line_number
61 self.line = line
62 self.extra = args
63
64 def __str__(self):
65 out = self.description
66 if self.pid:
67 out += '\npid: %d' % self.pid
68 if self.line_number:
69 out += '\nline: %d' % self.line_number
70 if self.line:
71 out += '\n%s' % self.line
72 if self.extra:
73 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
74 return out
75
76
77## OS-specific functions
78
79if sys.platform == 'win32':
80 def QueryDosDevice(drive_letter):
81 """Returns the Windows 'native' path for a DOS drive letter."""
82 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
maruel@chromium.org306e0e72012-11-02 18:22:03 +000083 assert isinstance(drive_letter, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000084 # Guesswork. QueryDosDeviceW never returns the required number of bytes.
85 chars = 1024
maruel@chromium.org306e0e72012-11-02 18:22:03 +000086 drive_letter = drive_letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000087 p = create_unicode_buffer(chars)
88 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):
89 err = GetLastError()
90 if err:
91 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +000092 msg = u'QueryDosDevice(%s): %s (%d)' % (
93 drive_letter, FormatError(err), err)
94 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000095 return p.value
96
97
98 def GetShortPathName(long_path):
99 """Returns the Windows short path equivalent for a 'long' path."""
maruel@chromium.org68bb1482013-02-15 22:36:24 +0000100 assert isinstance(long_path, unicode), repr(long_path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000101 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
102 # not enforced.
103 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):
104 long_path = '\\\\?\\' + long_path
105 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)
106 if chars:
107 p = create_unicode_buffer(chars)
108 if windll.kernel32.GetShortPathNameW(long_path, p, chars):
109 return p.value
110
111 err = GetLastError()
112 if err:
113 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000114 msg = u'GetShortPathName(%s): %s (%d)' % (
115 long_path, FormatError(err), err)
116 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000117
118
119 def GetLongPathName(short_path):
120 """Returns the Windows long path equivalent for a 'short' path."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000121 assert isinstance(short_path, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000122 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
123 # not enforced.
124 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):
125 short_path = '\\\\?\\' + short_path
126 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)
127 if chars:
128 p = create_unicode_buffer(chars)
129 if windll.kernel32.GetLongPathNameW(short_path, p, chars):
130 return p.value
131
132 err = GetLastError()
133 if err:
134 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000135 msg = u'GetLongPathName(%s): %s (%d)' % (
136 short_path, FormatError(err), err)
137 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000138
139
140 def get_current_encoding():
141 """Returns the 'ANSI' code page associated to the process."""
142 return 'cp%d' % int(windll.kernel32.GetACP())
143
144
145 class DosDriveMap(object):
146 """Maps \Device\HarddiskVolumeN to N: on Windows."""
147 # Keep one global cache.
148 _MAPPING = {}
149
150 def __init__(self):
151 """Lazy loads the cache."""
152 if not self._MAPPING:
153 # This is related to UNC resolver on windows. Ignore that.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000154 self._MAPPING[u'\\Device\\Mup'] = None
155 self._MAPPING[u'\\SystemRoot'] = os.environ[u'SystemRoot']
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000156
157 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
158 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000159 letter = u'%s:' % letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000160 mapped = QueryDosDevice(letter)
161 if mapped in self._MAPPING:
162 logging.warn(
163 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
164 '. Drive letters are a user-mode concept and the kernel '
165 'traces only have NT path, so all accesses will be '
166 'associated with the first drive letter, independent of the '
167 'actual letter used by the code') % (
168 self._MAPPING[mapped], letter))
169 else:
170 self._MAPPING[mapped] = letter
171 except WindowsError: # pylint: disable=E0602
172 pass
173
174 def to_win32(self, path):
175 """Converts a native NT path to Win32/DOS compatible path."""
176 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
177 if not match:
178 raise ValueError(
179 'Can\'t convert %s into a Win32 compatible path' % path,
180 path)
181 if not match.group(1) in self._MAPPING:
182 # Unmapped partitions may be accessed by windows for the
183 # fun of it while the test is running. Discard these.
184 return None
185 drive = self._MAPPING[match.group(1)]
186 if not drive or not match.group(2):
187 return drive
188 return drive + match.group(2)
189
190
191 def isabs(path):
192 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""
193 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
194
195
196 def get_native_path_case(p):
197 """Returns the native path case for an existing file.
198
199 On Windows, removes any leading '\\?\'.
200 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000201 assert isinstance(p, unicode), repr(p)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000202 if not isabs(p):
203 raise ValueError(
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000204 'get_native_path_case(%r): Require an absolute path' % p, p)
205
maruel@chromium.org037758d2012-12-10 17:59:46 +0000206 # Make sure it is normalized to os.path.sep. Do not do it here to keep the
207 # function fast
208 assert '/' not in p, p
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000209 suffix = ''
210 count = p.count(':')
211 if count > 1:
212 # This means it has an alternate-data stream. There could be 3 ':', since
213 # it could be the $DATA datastream of an ADS. Split the whole ADS suffix
214 # off and add it back afterward. There is no way to know the native path
215 # case of an alternate data stream.
216 items = p.split(':')
217 p = ':'.join(items[0:2])
218 suffix = ''.join(':' + i for i in items[2:])
219
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000220 # Windows used to have an option to turn on case sensitivity on non Win32
221 # subsystem but that's out of scope here and isn't supported anymore.
222 # Go figure why GetShortPathName() is needed.
223 try:
224 out = GetLongPathName(GetShortPathName(p))
225 except OSError, e:
226 if e.args[0] in (2, 3, 5):
227 # The path does not exist. Try to recurse and reconstruct the path.
228 base = os.path.dirname(p)
229 rest = os.path.basename(p)
230 return os.path.join(get_native_path_case(base), rest)
231 raise
232 if out.startswith('\\\\?\\'):
233 out = out[4:]
234 # Always upper case the first letter since GetLongPathName() will return the
235 # drive letter in the case it was given.
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000236 return out[0].upper() + out[1:] + suffix
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000237
238
239 def CommandLineToArgvW(command_line):
240 """Splits a commandline into argv using CommandLineToArgvW()."""
241 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
242 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000243 assert isinstance(command_line, unicode)
244 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000245 try:
246 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
247 finally:
248 windll.kernel32.LocalFree(ptr)
249
250
251elif sys.platform == 'darwin':
252
253
254 # On non-windows, keep the stdlib behavior.
255 isabs = os.path.isabs
256
257
258 def _find_item_native_case(root_path, item):
259 """Gets the native path case of a single item based at root_path.
260
261 There is no API to get the native path case of symlinks on OSX. So it
262 needs to be done the slow way.
263 """
264 item = item.lower()
265 for element in os.listdir(root_path):
266 if element.lower() == item:
267 return element
268
269
270 def _native_case(p):
271 """Gets the native path case. Warning: this function resolves symlinks."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000272 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000273 rel_ref, _ = Carbon.File.FSPathMakeRef(p.encode('utf-8'))
274 # The OSX underlying code uses NFD but python strings are in NFC. This
275 # will cause issues with os.listdir() for example. Since the dtrace log
276 # *is* in NFC, normalize it here.
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000277 out = unicodedata.normalize(
278 'NFC', rel_ref.FSRefMakePath().decode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000279 if p.endswith(os.path.sep) and not out.endswith(os.path.sep):
280 return out + os.path.sep
281 return out
282 except MacOS.Error, e:
283 if e.args[0] in (-43, -120):
284 # The path does not exist. Try to recurse and reconstruct the path.
285 # -43 means file not found.
286 # -120 means directory not found.
287 base = os.path.dirname(p)
288 rest = os.path.basename(p)
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000289 return os.path.join(_native_case(base), rest)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000290 raise OSError(
291 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])
292
293
294 def _split_at_symlink_native(base_path, rest):
295 """Returns the native path for a symlink."""
296 base, symlink, rest = split_at_symlink(base_path, rest)
297 if symlink:
298 if not base_path:
299 base_path = base
300 else:
301 base_path = safe_join(base_path, base)
302 symlink = _find_item_native_case(base_path, symlink)
303 return base, symlink, rest
304
305
306 def get_native_path_case(path):
307 """Returns the native path case for an existing file.
308
309 Technically, it's only HFS+ on OSX that is case preserving and
310 insensitive. It's the default setting on HFS+ but can be changed.
311 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000312 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000313 if not isabs(path):
314 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000315 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000316 if path.startswith('/dev'):
317 # /dev is not visible from Carbon, causing an exception.
318 return path
319
320 # Starts assuming there is no symlink along the path.
321 resolved = _native_case(path)
322 if resolved.lower() == path.lower():
323 # This code path is incredibly faster.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000324 logging.debug('get_native_path_case(%s) = %s' % (path, resolved))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000325 return resolved
326
327 # There was a symlink, process it.
328 base, symlink, rest = _split_at_symlink_native(None, path)
329 assert symlink, (path, base, symlink, rest, resolved)
330 prev = base
331 base = safe_join(_native_case(base), symlink)
332 assert len(base) > len(prev)
333 while rest:
334 prev = base
335 relbase, symlink, rest = _split_at_symlink_native(base, rest)
336 base = safe_join(base, relbase)
337 assert len(base) > len(prev), (prev, base, symlink)
338 if symlink:
339 base = safe_join(base, symlink)
340 assert len(base) > len(prev), (prev, base, symlink)
341 # Make sure no symlink was resolved.
342 assert base.lower() == path.lower(), (base, path)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000343 logging.debug('get_native_path_case(%s) = %s' % (path, base))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000344 return base
345
346
347else: # OSes other than Windows and OSX.
348
349
350 # On non-windows, keep the stdlib behavior.
351 isabs = os.path.isabs
352
353
354 def get_native_path_case(path):
355 """Returns the native path case for an existing file.
356
357 On OSes other than OSX and Windows, assume the file system is
358 case-sensitive.
359
360 TODO(maruel): This is not strictly true. Implement if necessary.
361 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000362 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000363 if not isabs(path):
364 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000365 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000366 # Give up on cygwin, as GetLongPathName() can't be called.
367 # Linux traces tends to not be normalized so use this occasion to normalize
368 # it. This function implementation already normalizes the path on the other
369 # OS so this needs to be done here to be coherent between OSes.
370 out = os.path.normpath(path)
371 if path.endswith(os.path.sep) and not out.endswith(os.path.sep):
372 return out + os.path.sep
373 return out
374
375
376if sys.platform != 'win32': # All non-Windows OSes.
377
378
379 def safe_join(*args):
380 """Joins path elements like os.path.join() but doesn't abort on absolute
381 path.
382
383 os.path.join('foo', '/bar') == '/bar'
384 but safe_join('foo', '/bar') == 'foo/bar'.
385 """
386 out = ''
387 for element in args:
388 if element.startswith(os.path.sep):
389 if out.endswith(os.path.sep):
390 out += element[1:]
391 else:
392 out += element
393 else:
394 if out.endswith(os.path.sep):
395 out += element
396 else:
397 out += os.path.sep + element
398 return out
399
400
401 def split_at_symlink(base_dir, relfile):
402 """Scans each component of relfile and cut the string at the symlink if
403 there is any.
404
405 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if
406 not symlink was found.
407 """
408 if base_dir:
409 assert relfile
410 assert os.path.isabs(base_dir)
411 index = 0
412 else:
413 assert os.path.isabs(relfile)
414 index = 1
415
416 def at_root(rest):
417 if base_dir:
418 return safe_join(base_dir, rest)
419 return rest
420
421 while True:
422 try:
423 index = relfile.index(os.path.sep, index)
424 except ValueError:
425 index = len(relfile)
426 full = at_root(relfile[:index])
427 if os.path.islink(full):
428 # A symlink!
429 base = os.path.dirname(relfile[:index])
430 symlink = os.path.basename(relfile[:index])
431 rest = relfile[index:]
432 logging.debug(
433 'split_at_symlink(%s, %s) -> (%s, %s, %s)' %
434 (base_dir, relfile, base, symlink, rest))
435 return base, symlink, rest
436 if index == len(relfile):
437 break
438 index += 1
439 return relfile, None, None
440
441
442def fix_python_path(cmd):
443 """Returns the fixed command line to call the right python executable."""
444 out = cmd[:]
445 if out[0] == 'python':
446 out[0] = sys.executable
447 elif out[0].endswith('.py'):
448 out.insert(0, sys.executable)
449 return out
450
451
452def create_thunk():
453 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
454 os.write(
455 handle,
456 (
457 'import subprocess\n'
458 'import sys\n'
459 'sys.exit(subprocess.call(sys.argv[2:]))\n'
460 ))
461 os.close(handle)
462 return name
463
464
465def strace_process_quoted_arguments(text):
466 """Extracts quoted arguments on a string and return the arguments as a list.
467
468 Implemented as an automaton. Supports incomplete strings in the form
469 '"foo"...'.
470
471 Example:
472 With text = '"foo", "bar"', the function will return ['foo', 'bar']
473
474 TODO(maruel): Implement escaping.
475 """
476 # All the possible states of the DFA.
477 ( NEED_QUOTE, # Begining of a new arguments.
478 INSIDE_STRING, # Inside an argument.
479 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
480 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
481 # a serie of 3 dots or a comma.
482 NEED_SPACE, # Right after a comma
483 NEED_DOT_2, # Found a dot, need a second one.
484 NEED_DOT_3, # Found second dot, need a third one.
485 NEED_COMMA, # Found third dot, need a comma.
486 ) = range(8)
487
488 state = NEED_QUOTE
489 out = []
490 for index, char in enumerate(text):
491 if char == '"':
492 if state == NEED_QUOTE:
493 state = INSIDE_STRING
494 # A new argument was found.
495 out.append('')
496 elif state == INSIDE_STRING:
497 # The argument is now closed.
498 state = NEED_COMMA_OR_DOT
499 elif state == ESCAPED:
500 out[-1] += char
501 state = INSIDE_STRING
502 else:
503 raise ValueError(
504 'Can\'t process char at column %d for: %r' % (index, text),
505 index,
506 text)
507 elif char == ',':
508 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
509 state = NEED_SPACE
510 elif state == INSIDE_STRING:
511 out[-1] += char
512 elif state == ESCAPED:
513 out[-1] += char
514 state = INSIDE_STRING
515 else:
516 raise ValueError(
517 'Can\'t process char at column %d for: %r' % (index, text),
518 index,
519 text)
520 elif char == ' ':
521 if state == NEED_SPACE:
522 state = NEED_QUOTE
523 elif state == INSIDE_STRING:
524 out[-1] += char
525 elif state == ESCAPED:
526 out[-1] += char
527 state = INSIDE_STRING
528 else:
529 raise ValueError(
530 'Can\'t process char at column %d for: %r' % (index, text),
531 index,
532 text)
533 elif char == '.':
534 if state == NEED_COMMA_OR_DOT:
535 # The string is incomplete, this mean the strace -s flag should be
536 # increased.
537 state = NEED_DOT_2
538 elif state == NEED_DOT_2:
539 state = NEED_DOT_3
540 elif state == NEED_DOT_3:
541 state = NEED_COMMA
542 elif state == INSIDE_STRING:
543 out[-1] += char
544 elif state == ESCAPED:
545 out[-1] += char
546 state = INSIDE_STRING
547 else:
548 raise ValueError(
549 'Can\'t process char at column %d for: %r' % (index, text),
550 index,
551 text)
552 elif char == '\\':
553 if state == ESCAPED:
554 out[-1] += char
555 state = INSIDE_STRING
556 elif state == INSIDE_STRING:
557 state = ESCAPED
558 else:
559 raise ValueError(
560 'Can\'t process char at column %d for: %r' % (index, text),
561 index,
562 text)
563 else:
564 if state == INSIDE_STRING:
565 out[-1] += char
566 else:
567 raise ValueError(
568 'Can\'t process char at column %d for: %r' % (index, text),
569 index,
570 text)
571 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
572 raise ValueError(
573 'String is incorrectly terminated: %r' % text,
574 text)
575 return out
576
577
578def read_json(filepath):
579 with open(filepath, 'r') as f:
580 return json.load(f)
581
582
583def write_json(filepath_or_handle, data, dense):
584 """Writes data into filepath or file handle encoded as json.
585
586 If dense is True, the json is packed. Otherwise, it is human readable.
587 """
588 if hasattr(filepath_or_handle, 'write'):
589 if dense:
590 filepath_or_handle.write(
591 json.dumps(data, sort_keys=True, separators=(',',':')))
592 else:
593 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
594 else:
595 with open(filepath_or_handle, 'wb') as f:
596 if dense:
597 json.dump(data, f, sort_keys=True, separators=(',',':'))
598 else:
599 json.dump(data, f, sort_keys=True, indent=2)
600
601
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000602def assert_is_renderable(pseudo_string):
603 """Asserts the input is a valid object to be processed by render()."""
604 assert (
605 isinstance(pseudo_string, (None.__class__, unicode)) or
606 hasattr(pseudo_string, 'render')), repr(pseudo_string)
607
608
609def render(pseudo_string):
610 """Converts the pseudo-string to an unicode string."""
611 assert_is_renderable(pseudo_string)
612 if isinstance(pseudo_string, (None.__class__, unicode)):
613 return pseudo_string
614 return pseudo_string.render()
615
616
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000617class Results(object):
618 """Results of a trace session."""
619
620 class _TouchedObject(object):
621 """Something, a file or a directory, that was accessed."""
622 def __init__(self, root, path, tainted, size, nb_files):
623 logging.debug(
624 '%s(%s, %s, %s, %s, %s)' %
625 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000626 assert_is_renderable(root)
627 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000628 self.root = root
629 self.path = path
630 self.tainted = tainted
631 self.nb_files = nb_files
632 # Can be used as a cache or a default value, depending on context.
633 self._size = size
634 # These are cache only.
635 self._real_path = None
636
637 # Check internal consistency.
638 assert path, path
639 assert tainted or bool(root) != bool(isabs(path)), (root, path)
640 assert tainted or (
641 not os.path.exists(self.full_path) or
642 (self.full_path == get_native_path_case(self.full_path))), (
643 tainted, self.full_path, get_native_path_case(self.full_path))
644
645 @property
646 def existent(self):
647 return self.size != -1
648
649 @property
650 def full_path(self):
651 if self.root:
652 return os.path.join(self.root, self.path)
653 return self.path
654
655 @property
656 def real_path(self):
657 """Returns the path with symlinks resolved."""
658 if not self._real_path:
659 self._real_path = os.path.realpath(self.full_path)
660 return self._real_path
661
662 @property
663 def size(self):
664 """File's size. -1 is not existent."""
665 if self._size is None and not self.tainted:
666 try:
667 self._size = os.stat(self.full_path).st_size
668 except OSError:
669 self._size = -1
670 return self._size
671
672 def flatten(self):
673 """Returns a dict representing this object.
674
675 A 'size' of 0 means the file was only touched and not read.
676 """
677 return {
678 'path': self.path,
679 'size': self.size,
680 }
681
682 def replace_variables(self, variables):
683 """Replaces the root of this File with one of the variables if it matches.
684
685 If a variable replacement occurs, the cloned object becomes tainted.
686 """
687 for variable, root_path in variables.iteritems():
688 if self.path.startswith(root_path):
689 return self._clone(
690 self.root, variable + self.path[len(root_path):], True)
691 # No need to clone, returns ourself.
692 return self
693
694 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000695 """Returns a clone of itself with 'root' stripped off.
696
697 Note that the file is kept if it is either accessible from a symlinked
698 path that was used to access the file or through the real path.
699 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000700 # Check internal consistency.
701 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root
702 if not self.full_path.startswith(root):
703 # Now try to resolve the symlinks to see if it can be reached this way.
704 # Only try *after* trying without resolving symlink.
705 if not self.real_path.startswith(root):
706 return None
707 path = self.real_path
708 else:
709 path = self.full_path
710 return self._clone(root, path[len(root):], self.tainted)
711
712 def _clone(self, new_root, new_path, tainted):
713 raise NotImplementedError(self.__class__.__name__)
714
715 class File(_TouchedObject):
716 """A file that was accessed. May not be present anymore.
717
718 If tainted is true, it means it is not a real path anymore as a variable
719 replacement occured.
720
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000721 If only_touched is True, this means the file was probed for existence, and
722 it is existent, but was never _opened_. If only_touched is True, the file
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000723 must have existed.
724 """
725 def __init__(self, root, path, tainted, size):
726 super(Results.File, self).__init__(root, path, tainted, size, 1)
727
728 def _clone(self, new_root, new_path, tainted):
729 """Clones itself keeping meta-data."""
730 # Keep the self.size and self._real_path caches for performance reason. It
731 # is also important when the file becomes tainted (with a variable instead
732 # of the real path) since self.path is not an on-disk path anymore so
733 # out._size cannot be updated.
734 out = self.__class__(new_root, new_path, tainted, self.size)
735 out._real_path = self._real_path
736 return out
737
738 class Directory(_TouchedObject):
739 """A directory of files. Must exist."""
740 def __init__(self, root, path, tainted, size, nb_files):
741 """path='.' is a valid value and must be handled appropriately."""
742 assert not path.endswith(os.path.sep), path
743 super(Results.Directory, self).__init__(
744 root, path + os.path.sep, tainted, size, nb_files)
745 # For a Directory instance, self.size is not a cache, it's an actual value
746 # that is never modified and represents the total size of the files
747 # contained in this directory. It is possible that the directory is empty
748 # so that size == 0; this happens if there's only an invalid symlink in
749 # it.
750
751 def flatten(self):
752 out = super(Results.Directory, self).flatten()
753 out['nb_files'] = self.nb_files
754 return out
755
756 def _clone(self, new_root, new_path, tainted):
757 """Clones itself keeping meta-data."""
758 out = self.__class__(
759 new_root,
760 new_path.rstrip(os.path.sep),
761 tainted,
762 self.size,
763 self.nb_files)
764 out._real_path = self._real_path
765 return out
766
767 class Process(object):
768 """A process that was traced.
769
770 Contains references to the files accessed by this process and its children.
771 """
772 def __init__(self, pid, files, executable, command, initial_cwd, children):
773 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
774 self.pid = pid
775 self.files = sorted(files, key=lambda x: x.path)
776 self.children = children
777 self.executable = executable
778 self.command = command
779 self.initial_cwd = initial_cwd
780
781 # Check internal consistency.
782 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
783 f.path for f in self.files)
784 assert isinstance(self.children, list)
785 assert isinstance(self.files, list)
786
787 @property
788 def all(self):
789 for child in self.children:
790 for i in child.all:
791 yield i
792 yield self
793
794 def flatten(self):
795 return {
796 'children': [c.flatten() for c in self.children],
797 'command': self.command,
798 'executable': self.executable,
799 'files': [f.flatten() for f in self.files],
800 'initial_cwd': self.initial_cwd,
801 'pid': self.pid,
802 }
803
804 def strip_root(self, root):
805 assert isabs(root) and root.endswith(os.path.sep), root
806 # Loads the files after since they are constructed as objects.
807 out = self.__class__(
808 self.pid,
809 filter(None, (f.strip_root(root) for f in self.files)),
810 self.executable,
811 self.command,
812 self.initial_cwd,
813 [c.strip_root(root) for c in self.children])
814 logging.debug(
815 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
816 return out
817
818 def __init__(self, process):
819 self.process = process
820 # Cache.
821 self._files = None
822
823 def flatten(self):
824 return {
825 'root': self.process.flatten(),
826 }
827
828 @property
829 def files(self):
830 if self._files is None:
831 self._files = sorted(
832 sum((p.files for p in self.process.all), []),
833 key=lambda x: x.path)
834 return self._files
835
836 @property
837 def existent(self):
838 return [f for f in self.files if f.existent]
839
840 @property
841 def non_existent(self):
842 return [f for f in self.files if not f.existent]
843
844 def strip_root(self, root):
845 """Returns a clone with all the files outside the directory |root| removed
846 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000847
848 It keeps files accessible through the |root| directory or that have been
849 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000850 """
851 # Resolve any symlink
852 root = os.path.realpath(root)
853 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep
854 logging.debug('strip_root(%s)' % root)
855 return Results(self.process.strip_root(root))
856
857
858class ApiBase(object):
859 """OS-agnostic API to trace a process and its children."""
860 class Context(object):
861 """Processes one log line at a time and keeps the list of traced processes.
862
863 The parsing is complicated by the fact that logs are traced out of order for
864 strace but in-order for dtrace and logman. In addition, on Windows it is
865 very frequent that processids are reused so a flat list cannot be used. But
866 at the same time, it is impossible to faithfully construct a graph when the
867 logs are processed out of order. So both a tree and a flat mapping are used,
868 the tree is the real process tree, while the flat mapping stores the last
869 valid process for the corresponding processid. For the strace case, the
870 tree's head is guessed at the last moment.
871 """
872 class Process(object):
873 """Keeps context for one traced child process.
874
875 Logs all the files this process touched. Ignores directories.
876 """
877 def __init__(self, blacklist, pid, initial_cwd):
878 # Check internal consistency.
879 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000880 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000881 self.pid = pid
882 # children are Process instances.
883 self.children = []
884 self.initial_cwd = initial_cwd
885 self.cwd = None
886 self.files = set()
887 self.only_touched = set()
888 self.executable = None
889 self.command = None
890 self._blacklist = blacklist
891
892 def to_results_process(self):
893 """Resolves file case sensitivity and or late-bound strings."""
894 # When resolving files, it's normal to get dupe because a file could be
895 # opened multiple times with different case. Resolve the deduplication
896 # here.
897 def fix_path(x):
898 """Returns the native file path case.
899
900 Converts late-bound strings.
901 """
902 if not x:
903 # Do not convert None instance to 'None'.
904 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000905 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000906 if os.path.isabs(x):
907 # If the path is not absolute, which tends to happen occasionally on
908 # Windows, it is not possible to get the native path case so ignore
909 # that trace. It mostly happens for 'executable' value.
910 x = get_native_path_case(x)
911 return x
912
913 def fix_and_blacklist_path(x):
914 x = fix_path(x)
915 if not x:
916 return
917 # The blacklist needs to be reapplied, since path casing could
918 # influence blacklisting.
919 if self._blacklist(x):
920 return
921 return x
922
923 # Filters out directories. Some may have passed through.
924 files = set(f for f in map(fix_and_blacklist_path, self.files) if f)
925 only_touched = set(
926 f for f in map(fix_and_blacklist_path, self.only_touched) if f)
927 only_touched -= files
928
929 files = [
930 Results.File(None, f, False, None) for f in files
931 if not os.path.isdir(f)
932 ]
933 # Using 0 as size means the file's content is ignored since the file was
934 # never opened for I/O.
935 files.extend(
936 Results.File(None, f, False, 0) for f in only_touched
937 if not os.path.isdir(f)
938 )
939 return Results.Process(
940 self.pid,
941 files,
942 fix_path(self.executable),
943 self.command,
944 fix_path(self.initial_cwd),
945 [c.to_results_process() for c in self.children])
946
947 def add_file(self, filepath, touch_only):
948 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000949 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000950 return
951 logging.debug('add_file(%d, %s, %s)' % (self.pid, filepath, touch_only))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000952 # Note that filepath and not render(filepath) is added. It is
953 # because filepath could be something else than a string, like a
954 # RelativePath instance for dtrace logs.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000955 if touch_only:
956 self.only_touched.add(filepath)
957 else:
958 self.files.add(filepath)
959
960 def __init__(self, blacklist):
961 self.blacklist = blacklist
962 # Initial process.
963 self.root_process = None
964 # dict to accelerate process lookup, to not have to lookup the whole graph
965 # each time.
966 self._process_lookup = {}
967
968 class Tracer(object):
969 """During it's lifetime, the tracing subsystem is enabled."""
970 def __init__(self, logname):
971 self._logname = logname
972 self._lock = threading.Lock()
973 self._traces = []
974 self._initialized = True
975
976 def trace(self, cmd, cwd, tracename, output):
977 """Runs the OS-specific trace program on an executable.
978
979 Arguments:
980 - cmd: The command (a list) to run.
981 - cwd: Current directory to start the child process in.
982 - tracename: Name of the trace in the logname file.
983 - output: If False, redirects output to PIPEs.
984
985 Returns a tuple (resultcode, output) and updates the internal trace
986 entries.
987 """
988 # The implementation adds an item to self._traces.
989 raise NotImplementedError(self.__class__.__name__)
990
991 def close(self, _timeout=None):
992 """Saves the meta-data in the logname file.
993
994 For kernel-based tracing, stops the tracing subsystem.
995
996 Must not be used manually when using 'with' construct.
997 """
998 with self._lock:
999 assert self._initialized
1000 try:
1001 data = {
1002 'traces': self._traces,
1003 }
1004 write_json(self._logname, data, False)
1005 finally:
1006 self._initialized = False
1007
1008 def post_process_log(self):
1009 """Post-processes the log so it becomes faster to load afterward.
1010
1011 Must not be used manually when using 'with' construct.
1012 """
1013 assert not self._initialized, 'Must stop tracing first.'
1014
1015 def __enter__(self):
1016 """Enables 'with' statement."""
1017 return self
1018
1019 def __exit__(self, exc_type, exc_value, traceback):
1020 """Enables 'with' statement."""
1021 self.close()
1022 # If an exception was thrown, do not process logs.
1023 if not exc_type:
1024 self.post_process_log()
1025
1026 def get_tracer(self, logname):
1027 """Returns an ApiBase.Tracer instance.
1028
1029 Initializes the tracing subsystem, which is a requirement for kernel-based
1030 tracers. Only one tracer instance should be live at a time!
1031
1032 logname is the filepath to the json file that will contain the meta-data
1033 about the logs.
1034 """
1035 return self.Tracer(logname)
1036
1037 @staticmethod
1038 def clean_trace(logname):
1039 """Deletes an old log."""
1040 raise NotImplementedError()
1041
1042 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001043 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001044 """Processes trace logs and returns the files opened and the files that do
1045 not exist.
1046
1047 It does not track directories.
1048
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001049 Arguments:
1050 - logname: must be an absolute path.
1051 - blacklist: must be a lambda.
1052 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001053
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001054 Most of the time, files that do not exist are temporary test files that
1055 should be put in /tmp instead. See http://crbug.com/116251.
1056
1057 Returns a list of dict with keys:
1058 - results: A Results instance.
1059 - trace: The corresponding tracename parameter provided to
1060 get_tracer().trace().
1061 - output: Output gathered during execution, if get_tracer().trace(...,
1062 output=False) was used.
1063 """
1064 raise NotImplementedError(cls.__class__.__name__)
1065
1066
1067class Strace(ApiBase):
1068 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001069 @staticmethod
1070 def load_filename(filename):
1071 """Parses a filename in a log."""
1072 assert isinstance(filename, str)
1073 out = ''
1074 i = 0
1075 while i < len(filename):
1076 c = filename[i]
1077 if c == '\\':
1078 out += chr(int(filename[i+1:i+4], 8))
1079 i += 4
1080 else:
1081 out += c
1082 i += 1
1083 # TODO(maruel): That's not necessarily true that the current code page is
1084 # utf-8.
1085 return out.decode('utf-8')
1086
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001087 class Context(ApiBase.Context):
1088 """Processes a strace log line and keeps the list of existent and non
1089 existent files accessed.
1090
1091 Ignores directories.
1092
1093 Uses late-binding to processes the cwd of each process. The problem is that
1094 strace generates one log file per process it traced but doesn't give any
1095 information about which process was started when and by who. So we don't
1096 even know which process is the initial one. So process the logs out of
1097 order and use late binding with RelativePath to be able to deduce the
1098 initial directory of each process once all the logs are parsed.
1099 """
1100 class Process(ApiBase.Context.Process):
1101 """Represents the state of a process.
1102
1103 Contains all the information retrieved from the pid-specific log.
1104 """
1105 # Function names are using ([a-z_0-9]+)
1106 # This is the most common format. function(args) = result
1107 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.+?)\)\s+= (.+)$')
1108 # An interrupted function call, only grab the minimal header.
1109 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
1110 # A resumed function call.
1111 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
1112 # A process received a signal.
1113 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
1114 # A process didn't handle a signal. Ignore any junk appearing before,
1115 # because the process was forcibly killed so it won't open any new file.
1116 RE_KILLED = re.compile(
1117 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
1118 # The process has exited.
1119 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
1120 # A call was canceled. Ignore any prefix.
1121 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
1122 # Happens when strace fails to even get the function name.
1123 UNNAMED_FUNCTION = '????'
1124
1125 # Corner-case in python, a class member function decorator must not be
1126 # @staticmethod.
1127 def parse_args(regexp, expect_zero): # pylint: disable=E0213
1128 """Automatically convert the str 'args' into a list of processed
1129 arguments.
1130
1131 Arguments:
1132 - regexp is used to parse args.
1133 - expect_zero: one of True, False or None.
1134 - True: will check for result.startswith('0') first and will ignore
1135 the trace line completely otherwise. This is important because for
1136 many functions, the regexp will not process if the call failed.
1137 - False: will check for not result.startswith(('?', '-1')) for the
1138 same reason than with True.
1139 - None: ignore result.
1140 """
1141 def meta_hook(function):
1142 assert function.__name__.startswith('handle_')
1143 def hook(self, args, result):
1144 if expect_zero is True and not result.startswith('0'):
1145 return
1146 if expect_zero is False and result.startswith(('?', '-1')):
1147 return
1148 match = re.match(regexp, args)
1149 if not match:
1150 raise TracingFailure(
1151 'Failed to parse %s(%s) = %s' %
1152 (function.__name__[len('handle_'):], args, result),
1153 None, None, None)
1154 return function(self, match.groups(), result)
1155 return hook
1156 return meta_hook
1157
1158 class RelativePath(object):
1159 """A late-bound relative path."""
1160 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001161 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001162 self.parent = parent
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001163 assert isinstance(value, (None.__class__, str)), repr(value)
1164 self.value = Strace.load_filename(value) if value else value
1165 if self.value:
1166 assert '\\' not in self.value, value
1167 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001168
1169 def render(self):
1170 """Returns the current directory this instance is representing.
1171
1172 This function is used to return the late-bound value.
1173 """
1174 if self.value and self.value.startswith(u'/'):
1175 # An absolute path.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001176 # TODO(maruel): This is wrong, we can't assert it is utf-8.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001177 return self.value
1178 parent = self.parent.render() if self.parent else u'<None>'
1179 if self.value:
1180 return os.path.normpath(os.path.join(parent, self.value))
1181 return parent
1182
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001183 def __init__(self, root, pid):
1184 """Keeps enough information to be able to guess the original process
1185 root.
1186
1187 strace doesn't store which process was the initial process. So more
1188 information needs to be kept so the graph can be reconstructed from the
1189 flat map.
1190 """
1191 logging.info('%s(%d)' % (self.__class__.__name__, pid))
1192 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
1193 assert isinstance(root, ApiBase.Context)
1194 self._root = weakref.ref(root)
1195 # The dict key is the function name of the pending call, like 'open'
1196 # or 'execve'.
1197 self._pending_calls = {}
1198 self._line_number = 0
1199 # Current directory when the process started.
1200 self.initial_cwd = self.RelativePath(self._root(), None)
1201 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001202 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001203
1204 def get_cwd(self):
1205 """Returns the best known value of cwd."""
1206 return self.cwd or self.initial_cwd
1207
1208 def render(self):
1209 """Returns the string value of the RelativePath() object.
1210
1211 Used by RelativePath. Returns the initial directory and not the
1212 current one since the current directory 'cwd' validity is time-limited.
1213
1214 The validity is only guaranteed once all the logs are processed.
1215 """
1216 return self.initial_cwd.render()
1217
1218 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001219 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001220 self._line_number += 1
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001221 if self._done:
1222 raise TracingFailure(
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001223 'Found a trace for a terminated process or corrupted log',
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001224 None, None, None)
1225
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001226 if self.RE_SIGNAL.match(line):
1227 # Ignore signals.
1228 return
1229
1230 try:
1231 match = self.RE_KILLED.match(line)
1232 if match:
1233 # Converts a '+++ killed by Foo +++' trace into an exit_group().
1234 self.handle_exit_group(match.group(1), None)
1235 return
1236
1237 match = self.RE_PROCESS_EXITED.match(line)
1238 if match:
1239 # Converts a '+++ exited with 1 +++' trace into an exit_group()
1240 self.handle_exit_group(match.group(1), None)
1241 return
1242
1243 match = self.RE_UNFINISHED.match(line)
1244 if match:
1245 if match.group(1) in self._pending_calls:
1246 raise TracingFailure(
1247 'Found two unfinished calls for the same function',
1248 None, None, None,
1249 self._pending_calls)
1250 self._pending_calls[match.group(1)] = (
1251 match.group(1) + match.group(2))
1252 return
1253
1254 match = self.RE_UNAVAILABLE.match(line)
1255 if match:
1256 # This usually means a process was killed and a pending call was
1257 # canceled.
1258 # TODO(maruel): Look up the last exit_group() trace just above and
1259 # make sure any self._pending_calls[anything] is properly flushed.
1260 return
1261
1262 match = self.RE_RESUMED.match(line)
1263 if match:
1264 if match.group(1) not in self._pending_calls:
1265 raise TracingFailure(
1266 'Found a resumed call that was not logged as unfinished',
1267 None, None, None,
1268 self._pending_calls)
1269 pending = self._pending_calls.pop(match.group(1))
1270 # Reconstruct the line.
1271 line = pending + match.group(2)
1272
1273 match = self.RE_HEADER.match(line)
1274 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001275 # The line is corrupted. It happens occasionally when a process is
1276 # killed forcibly with activity going on. Assume the process died.
1277 # No other line can be processed afterward.
1278 self._done = True
1279 return
1280
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001281 if match.group(1) == self.UNNAMED_FUNCTION:
1282 return
1283
1284 # It's a valid line, handle it.
1285 handler = getattr(self, 'handle_%s' % match.group(1), None)
1286 if not handler:
1287 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1288 return handler(match.group(2), match.group(3))
1289 except TracingFailure, e:
1290 # Hack in the values since the handler could be a static function.
1291 e.pid = self.pid
1292 e.line = line
1293 e.line_number = self._line_number
1294 # Re-raise the modified exception.
1295 raise
1296 except (KeyError, NotImplementedError, ValueError), e:
1297 raise TracingFailure(
1298 'Trace generated a %s exception: %s' % (
1299 e.__class__.__name__, str(e)),
1300 self.pid,
1301 self._line_number,
1302 line,
1303 e)
1304
1305 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1306 def handle_access(self, args, _result):
1307 self._handle_file(args[0], True)
1308
1309 @parse_args(r'^\"(.+?)\"$', True)
1310 def handle_chdir(self, args, _result):
1311 """Updates cwd."""
1312 self.cwd = self.RelativePath(self, args[0])
1313 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1314
1315 def handle_clone(self, _args, result):
1316 """Transfers cwd."""
1317 if result.startswith(('?', '-1')):
1318 # The call failed.
1319 return
1320 # Update the other process right away.
1321 childpid = int(result)
1322 child = self._root().get_or_set_proc(childpid)
1323 if child.parentid is not None or childpid in self.children:
1324 raise TracingFailure(
1325 'Found internal inconsitency in process lifetime detection '
1326 'during a clone() call',
1327 None, None, None)
1328
1329 # Copy the cwd object.
1330 child.initial_cwd = self.get_cwd()
1331 child.parentid = self.pid
1332 # It is necessary because the logs are processed out of order.
1333 self.children.append(child)
1334
1335 def handle_close(self, _args, _result):
1336 pass
1337
1338 def handle_chmod(self, _args, _result):
1339 pass
1340
1341 def handle_creat(self, _args, _result):
1342 # Ignore files created, since they didn't need to exist.
1343 pass
1344
1345 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1346 def handle_execve(self, args, _result):
1347 # Even if in practice execve() doesn't returns when it succeeds, strace
1348 # still prints '0' as the result.
1349 filepath = args[0]
1350 self._handle_file(filepath, False)
1351 self.executable = self.RelativePath(self.get_cwd(), filepath)
1352 self.command = strace_process_quoted_arguments(args[1])
1353
1354 def handle_exit_group(self, _args, _result):
1355 """Removes cwd."""
1356 self.cwd = None
1357
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001358 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1359 def handle_faccessat(self, args, _results):
1360 if args[0] == 'AT_FDCWD':
1361 self._handle_file(args[1], True)
1362 else:
1363 raise Exception('Relative faccess not implemented.')
1364
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001365 def handle_fork(self, args, result):
1366 self._handle_unknown('fork', args, result)
1367
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001368 def handle_futex(self, _args, _result):
1369 pass
1370
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001371 def handle_getcwd(self, _args, _result):
1372 pass
1373
1374 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1375 def handle_link(self, args, _result):
1376 self._handle_file(args[0], False)
1377 self._handle_file(args[1], False)
1378
1379 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1380 def handle_lstat(self, args, _result):
1381 self._handle_file(args[0], True)
1382
1383 def handle_mkdir(self, _args, _result):
1384 pass
1385
1386 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1387 def handle_open(self, args, _result):
1388 if 'O_DIRECTORY' in args[1]:
1389 return
1390 self._handle_file(args[0], False)
1391
1392 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1393 def handle_openat(self, args, _result):
1394 if 'O_DIRECTORY' in args[2]:
1395 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001396 if args[0] == 'AT_FDCWD':
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001397 self._handle_file(args[1], False)
1398 else:
1399 # TODO(maruel): Implement relative open if necessary instead of the
1400 # AT_FDCWD flag, let's hope not since this means tracking all active
1401 # directory handles.
1402 raise Exception('Relative open via openat not implemented.')
1403
1404 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1405 def handle_readlink(self, args, _result):
1406 self._handle_file(args[0], False)
1407
1408 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1409 def handle_rename(self, args, _result):
1410 self._handle_file(args[0], False)
1411 self._handle_file(args[1], False)
1412
1413 def handle_rmdir(self, _args, _result):
1414 pass
1415
1416 def handle_setxattr(self, _args, _result):
1417 pass
1418
1419 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1420 def handle_stat(self, args, _result):
1421 self._handle_file(args[0], True)
1422
1423 def handle_symlink(self, _args, _result):
1424 pass
1425
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001426 @parse_args(r'^\"(.+?)\", \d+', True)
1427 def handle_truncate(self, args, _result):
1428 self._handle_file(args[0], False)
1429
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001430 def handle_unlink(self, _args, _result):
1431 # In theory, the file had to be created anyway.
1432 pass
1433
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001434 def handle_unlinkat(self, _args, _result):
1435 # In theory, the file had to be created anyway.
1436 pass
1437
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001438 def handle_statfs(self, _args, _result):
1439 pass
1440
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001441 def handle_utimensat(self, _args, _result):
1442 pass
1443
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001444 def handle_vfork(self, args, result):
1445 self._handle_unknown('vfork', args, result)
1446
1447 @staticmethod
1448 def _handle_unknown(function, args, result):
1449 raise TracingFailure(
1450 'Unexpected/unimplemented trace %s(%s)= %s' %
1451 (function, args, result),
1452 None, None, None)
1453
1454 def _handle_file(self, filepath, touch_only):
1455 filepath = self.RelativePath(self.get_cwd(), filepath)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001456 #assert not touch_only, render(filepath)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001457 self.add_file(filepath, touch_only)
1458
1459 def __init__(self, blacklist, initial_cwd):
1460 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001461 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001462 self.initial_cwd = initial_cwd
1463
1464 def render(self):
1465 """Returns the string value of the initial cwd of the root process.
1466
1467 Used by RelativePath.
1468 """
1469 return self.initial_cwd
1470
1471 def on_line(self, pid, line):
1472 """Transfers control into the Process.on_line() function."""
1473 self.get_or_set_proc(pid).on_line(line.strip())
1474
1475 def to_results(self):
1476 """Finds back the root process and verify consistency."""
1477 # TODO(maruel): Absolutely unecessary, fix me.
1478 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1479 if len(root) != 1:
1480 raise TracingFailure(
1481 'Found internal inconsitency in process lifetime detection '
1482 'while finding the root process',
1483 None,
1484 None,
1485 None,
1486 sorted(p.pid for p in root))
1487 self.root_process = root[0]
1488 process = self.root_process.to_results_process()
1489 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1490 raise TracingFailure(
1491 'Found internal inconsitency in process lifetime detection '
1492 'while looking for len(tree) == len(list)',
1493 None,
1494 None,
1495 None,
1496 sorted(self._process_lookup),
1497 sorted(p.pid for p in process.all))
1498 return Results(process)
1499
1500 def get_or_set_proc(self, pid):
1501 """Returns the Context.Process instance for this pid or creates a new one.
1502 """
1503 if not pid or not isinstance(pid, int):
1504 raise TracingFailure(
1505 'Unpexpected value for pid: %r' % pid,
1506 pid,
1507 None,
1508 None,
1509 pid)
1510 if pid not in self._process_lookup:
1511 self._process_lookup[pid] = self.Process(self, pid)
1512 return self._process_lookup[pid]
1513
1514 @classmethod
1515 def traces(cls):
1516 """Returns the list of all handled traces to pass this as an argument to
1517 strace.
1518 """
1519 prefix = 'handle_'
1520 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1521
1522 class Tracer(ApiBase.Tracer):
1523 MAX_LEN = 256
1524
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001525 def __init__(self, logname, use_sudo):
1526 super(Strace.Tracer, self).__init__(logname)
1527 self.use_sudo = use_sudo
1528
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001529 def trace(self, cmd, cwd, tracename, output):
1530 """Runs strace on an executable."""
1531 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1532 assert os.path.isabs(cmd[0]), cmd[0]
1533 assert os.path.isabs(cwd), cwd
1534 assert os.path.normpath(cwd) == cwd, cwd
1535 with self._lock:
1536 if not self._initialized:
1537 raise TracingFailure(
1538 'Called Tracer.trace() on an unitialized object',
1539 None, None, None, tracename)
1540 assert tracename not in (i['trace'] for i in self._traces)
1541 stdout = stderr = None
1542 if output:
1543 stdout = subprocess.PIPE
1544 stderr = subprocess.STDOUT
1545 # Ensure all file related APIs are hooked.
1546 traces = ','.join(Strace.Context.traces() + ['file'])
1547 trace_cmd = [
1548 'strace',
1549 '-ff',
1550 '-s', '%d' % self.MAX_LEN,
1551 '-e', 'trace=%s' % traces,
1552 '-o', self._logname + '.' + tracename,
1553 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001554 if self.use_sudo is True:
1555 trace_cmd.insert(0, 'sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001556 child = subprocess.Popen(
1557 trace_cmd + cmd,
1558 cwd=cwd,
1559 stdin=subprocess.PIPE,
1560 stdout=stdout,
1561 stderr=stderr)
1562 out = child.communicate()[0]
1563 # TODO(maruel): Walk the logs and figure out the root process would
1564 # simplify parsing the logs a *lot*.
1565 with self._lock:
1566 assert tracename not in (i['trace'] for i in self._traces)
1567 self._traces.append(
1568 {
1569 'cmd': cmd,
1570 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001571 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001572 # The pid of strace process, not very useful.
1573 'pid': child.pid,
1574 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001575 })
1576 return child.returncode, out
1577
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001578 def __init__(self, use_sudo=None):
1579 super(Strace, self).__init__()
1580 self.use_sudo = use_sudo
1581
1582 def get_tracer(self, logname):
1583 return self.Tracer(logname, self.use_sudo)
1584
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001585 @staticmethod
1586 def clean_trace(logname):
1587 if os.path.isfile(logname):
1588 os.remove(logname)
1589 # Also delete any pid specific file from previous traces.
1590 for i in glob.iglob(logname + '.*'):
1591 if i.rsplit('.', 1)[1].isdigit():
1592 os.remove(i)
1593
1594 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001595 def parse_log(cls, logname, blacklist, trace_name):
1596 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001597 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001598 data = read_json(logname)
1599 out = []
1600 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001601 if trace_name and item['trace'] != trace_name:
1602 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001603 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001604 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001605 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001606 }
1607 try:
1608 context = cls.Context(blacklist, item['cwd'])
1609 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
1610 pid = pidfile.rsplit('.', 1)[1]
1611 if pid.isdigit():
1612 pid = int(pid)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001613 for line in open(pidfile, 'rb'):
1614 context.on_line(pid, line)
1615 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001616 except TracingFailure:
1617 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001618 out.append(result)
1619 return out
1620
1621
1622class Dtrace(ApiBase):
1623 """Uses DTrace framework through dtrace. Requires root access.
1624
1625 Implies Mac OSX.
1626
1627 dtruss can't be used because it has compatibility issues with python.
1628
1629 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1630 get the absolute path of the 'cwd' dtrace variable from the probe.
1631
1632 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1633 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1634 manually.
1635
1636 errno is not printed in the log since this implementation currently only cares
1637 about files that were successfully opened.
1638 """
1639 class Context(ApiBase.Context):
1640 # Format: index pid function(args)
1641 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1642
1643 # Arguments parsing.
1644 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1645 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1646 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1647 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1648 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1649 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1650
1651 O_DIRECTORY = 0x100000
1652
1653 class Process(ApiBase.Context.Process):
1654 def __init__(self, *args):
1655 super(Dtrace.Context.Process, self).__init__(*args)
1656 self.cwd = self.initial_cwd
1657
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001658 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001659 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001660 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001661 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001662 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001663 # Process ID of the temporary script created by create_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001664 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001665 self._initial_cwd = initial_cwd
1666 self._line_number = 0
1667
1668 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001669 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001670 self._line_number += 1
1671 match = self.RE_HEADER.match(line)
1672 if not match:
1673 raise TracingFailure(
1674 'Found malformed line: %s' % line,
1675 None,
1676 self._line_number,
1677 line)
1678 fn = getattr(
1679 self,
1680 'handle_%s' % match.group(2).replace('-', '_'),
1681 self._handle_ignored)
1682 # It is guaranteed to succeed because of the regexp. Or at least I thought
1683 # it would.
1684 pid = int(match.group(1))
1685 try:
1686 return fn(pid, match.group(3))
1687 except TracingFailure, e:
1688 # Hack in the values since the handler could be a static function.
1689 e.pid = pid
1690 e.line = line
1691 e.line_number = self._line_number
1692 # Re-raise the modified exception.
1693 raise
1694 except (KeyError, NotImplementedError, ValueError), e:
1695 raise TracingFailure(
1696 'Trace generated a %s exception: %s' % (
1697 e.__class__.__name__, str(e)),
1698 pid,
1699 self._line_number,
1700 line,
1701 e)
1702
1703 def to_results(self):
1704 process = self.root_process.to_results_process()
1705 # Internal concistency check.
1706 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1707 raise TracingFailure(
1708 'Found internal inconsitency in process lifetime detection '
1709 'while looking for len(tree) == len(list)',
1710 None,
1711 None,
1712 None,
1713 sorted(self._process_lookup),
1714 sorted(p.pid for p in process.all))
1715 return Results(process)
1716
1717 def handle_dtrace_BEGIN(self, _pid, args):
1718 if not self.RE_DTRACE_BEGIN.match(args):
1719 raise TracingFailure(
1720 'Found internal inconsitency in dtrace_BEGIN log line',
1721 None, None, None)
1722
1723 def handle_proc_start(self, pid, args):
1724 """Transfers cwd.
1725
1726 The dtrace script already takes care of only tracing the processes that
1727 are child of the traced processes so there is no need to verify the
1728 process hierarchy.
1729 """
1730 if pid in self._process_lookup:
1731 raise TracingFailure(
1732 'Found internal inconsitency in proc_start: %d started two times' %
1733 pid,
1734 None, None, None)
1735 match = self.RE_PROC_START.match(args)
1736 if not match:
1737 raise TracingFailure(
1738 'Failed to parse arguments: %s' % args,
1739 None, None, None)
1740 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001741 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001742 proc = self.root_process = self.Process(
1743 self.blacklist, pid, self._initial_cwd)
1744 elif ppid in self._process_lookup:
1745 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1746 self._process_lookup[ppid].children.append(proc)
1747 else:
1748 # Another process tree, ignore.
1749 return
1750 self._process_lookup[pid] = proc
1751 logging.debug(
1752 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001753 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001754
1755 def handle_proc_exit(self, pid, _args):
1756 """Removes cwd."""
1757 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001758 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001759 self._process_lookup[pid].cwd = None
1760
1761 def handle_execve(self, pid, args):
1762 """Sets the process' executable.
1763
1764 TODO(maruel): Read command line arguments. See
1765 https://discussions.apple.com/thread/1980539 for an example.
1766 https://gist.github.com/1242279
1767
1768 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1769 :)
1770 """
1771 if not pid in self._process_lookup:
1772 # Another process tree, ignore.
1773 return
1774 match = self.RE_EXECVE.match(args)
1775 if not match:
1776 raise TracingFailure(
1777 'Failed to parse arguments: %r' % args,
1778 None, None, None)
1779 proc = self._process_lookup[pid]
1780 proc.executable = match.group(1)
1781 proc.command = self.process_escaped_arguments(match.group(3))
1782 if int(match.group(2)) != len(proc.command):
1783 raise TracingFailure(
1784 'Failed to parse execve() arguments: %s' % args,
1785 None, None, None)
1786
1787 def handle_chdir(self, pid, args):
1788 """Updates cwd."""
1789 if pid not in self._process_lookup:
1790 # Another process tree, ignore.
1791 return
1792 cwd = self.RE_CHDIR.match(args).group(1)
1793 if not cwd.startswith('/'):
1794 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1795 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1796 else:
1797 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1798 cwd2 = cwd
1799 self._process_lookup[pid].cwd = cwd2
1800
1801 def handle_open_nocancel(self, pid, args):
1802 """Redirects to handle_open()."""
1803 return self.handle_open(pid, args)
1804
1805 def handle_open(self, pid, args):
1806 if pid not in self._process_lookup:
1807 # Another process tree, ignore.
1808 return
1809 match = self.RE_OPEN.match(args)
1810 if not match:
1811 raise TracingFailure(
1812 'Failed to parse arguments: %s' % args,
1813 None, None, None)
1814 flag = int(match.group(2), 16)
1815 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1816 # Ignore directories.
1817 return
1818 self._handle_file(pid, match.group(1))
1819
1820 def handle_rename(self, pid, args):
1821 if pid not in self._process_lookup:
1822 # Another process tree, ignore.
1823 return
1824 match = self.RE_RENAME.match(args)
1825 if not match:
1826 raise TracingFailure(
1827 'Failed to parse arguments: %s' % args,
1828 None, None, None)
1829 self._handle_file(pid, match.group(1))
1830 self._handle_file(pid, match.group(2))
1831
1832 def _handle_file(self, pid, filepath):
1833 if not filepath.startswith('/'):
1834 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1835 # We can get '..' in the path.
1836 filepath = os.path.normpath(filepath)
1837 # Sadly, still need to filter out directories here;
1838 # saw open_nocancel(".", 0, 0) = 0 lines.
1839 if os.path.isdir(filepath):
1840 return
1841 self._process_lookup[pid].add_file(filepath, False)
1842
1843 def handle_ftruncate(self, pid, args):
1844 """Just used as a signal to kill dtrace, ignoring."""
1845 pass
1846
1847 @staticmethod
1848 def _handle_ignored(pid, args):
1849 """Is called for all the event traces that are not handled."""
1850 raise NotImplementedError('Please implement me')
1851
1852 @staticmethod
1853 def process_escaped_arguments(text):
1854 """Extracts escaped arguments on a string and return the arguments as a
1855 list.
1856
1857 Implemented as an automaton.
1858
1859 Example:
1860 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1861 function will return ['python2.7', '-c', 'print("hi")]
1862 """
1863 if not text.endswith('\\0'):
1864 raise ValueError('String is not null terminated: %r' % text, text)
1865 text = text[:-2]
1866
1867 def unescape(x):
1868 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1869 out = []
1870 escaped = False
1871 for i in x:
1872 if i == '\\' and not escaped:
1873 escaped = True
1874 continue
1875 escaped = False
1876 out.append(i)
1877 return ''.join(out)
1878
1879 return [unescape(i) for i in text.split('\\001')]
1880
1881 class Tracer(ApiBase.Tracer):
1882 # pylint: disable=C0301
1883 #
1884 # To understand the following code, you'll want to take a look at:
1885 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
1886 # https://wikis.oracle.com/display/DTrace/Variables
1887 # http://docs.oracle.com/cd/E19205-01/820-4221/
1888 #
1889 # 0. Dump all the valid probes into a text file. It is important, you
1890 # want to redirect into a file and you don't want to constantly 'sudo'.
1891 # $ sudo dtrace -l > probes.txt
1892 #
1893 # 1. Count the number of probes:
1894 # $ wc -l probes.txt
1895 # 81823 # On OSX 10.7, including 1 header line.
1896 #
1897 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1898 # likes and skipping the header with NR>1:
1899 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1900 # dtrace
1901 # fbt
1902 # io
1903 # ip
1904 # lockstat
1905 # mach_trap
1906 # proc
1907 # profile
1908 # sched
1909 # syscall
1910 # tcp
1911 # vminfo
1912 #
1913 # 3. List of valid probes:
1914 # $ grep syscall probes.txt | less
1915 # or use dtrace directly:
1916 # $ sudo dtrace -l -P syscall | less
1917 #
1918 # trackedpid is an associative array where its value can be 0, 1 or 2.
1919 # 0 is for untracked processes and is the default value for items not
1920 # in the associative array.
1921 # 1 is for tracked processes.
1922 # 2 is for the script created by create_thunk() only. It is not tracked
1923 # itself but all its decendants are.
1924 #
1925 # The script will kill itself only once waiting_to_die == 1 and
1926 # current_processes == 0, so that both getlogin() was called and that
1927 # all traced processes exited.
1928 #
1929 # TODO(maruel): Use cacheable predicates. See
1930 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1931 D_CODE = """
1932 dtrace:::BEGIN {
1933 waiting_to_die = 0;
1934 current_processes = 0;
1935 logindex = 0;
1936 printf("%d %d %s_%s(\\"%s\\")\\n",
1937 logindex, PID, probeprov, probename, SCRIPT);
1938 logindex++;
1939 }
1940
1941 proc:::start /trackedpid[ppid]/ {
1942 trackedpid[pid] = 1;
1943 current_processes += 1;
1944 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1945 logindex, pid, probeprov, probename,
1946 ppid,
1947 execname,
1948 current_processes);
1949 logindex++;
1950 }
1951 /* Should use SCRIPT but there is no access to this variable at that
1952 * point. */
1953 proc:::start /ppid == PID && execname == "Python"/ {
1954 trackedpid[pid] = 2;
1955 current_processes += 1;
1956 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1957 logindex, pid, probeprov, probename,
1958 ppid,
1959 execname,
1960 current_processes);
1961 logindex++;
1962 }
1963 proc:::exit /trackedpid[pid] &&
1964 current_processes == 1 &&
1965 waiting_to_die == 1/ {
1966 trackedpid[pid] = 0;
1967 current_processes -= 1;
1968 printf("%d %d %s_%s(%d)\\n",
1969 logindex, pid, probeprov, probename,
1970 current_processes);
1971 logindex++;
1972 exit(0);
1973 }
1974 proc:::exit /trackedpid[pid]/ {
1975 trackedpid[pid] = 0;
1976 current_processes -= 1;
1977 printf("%d %d %s_%s(%d)\\n",
1978 logindex, pid, probeprov, probename,
1979 current_processes);
1980 logindex++;
1981 }
1982
1983 /* Use an arcane function to detect when we need to die */
1984 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
1985 waiting_to_die = 1;
1986 printf("%d %d %s()\\n", logindex, pid, probefunc);
1987 logindex++;
1988 }
1989 syscall::ftruncate:entry /
1990 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
1991 exit(0);
1992 }
1993
1994 syscall::open*:entry /trackedpid[pid] == 1/ {
1995 self->open_arg0 = arg0;
1996 self->open_arg1 = arg1;
1997 self->open_arg2 = arg2;
1998 }
1999 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
2000 this->open_arg0 = copyinstr(self->open_arg0);
2001 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
2002 logindex, pid, probefunc,
2003 this->open_arg0,
2004 self->open_arg1,
2005 self->open_arg2);
2006 logindex++;
2007 this->open_arg0 = 0;
2008 }
2009 syscall::open*:return /trackedpid[pid] == 1/ {
2010 self->open_arg0 = 0;
2011 self->open_arg1 = 0;
2012 self->open_arg2 = 0;
2013 }
2014
2015 syscall::rename:entry /trackedpid[pid] == 1/ {
2016 self->rename_arg0 = arg0;
2017 self->rename_arg1 = arg1;
2018 }
2019 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
2020 this->rename_arg0 = copyinstr(self->rename_arg0);
2021 this->rename_arg1 = copyinstr(self->rename_arg1);
2022 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
2023 logindex, pid, probefunc,
2024 this->rename_arg0,
2025 this->rename_arg1);
2026 logindex++;
2027 this->rename_arg0 = 0;
2028 this->rename_arg1 = 0;
2029 }
2030 syscall::rename:return /trackedpid[pid] == 1/ {
2031 self->rename_arg0 = 0;
2032 self->rename_arg1 = 0;
2033 }
2034
2035 /* Track chdir, it's painful because it is only receiving relative path.
2036 */
2037 syscall::chdir:entry /trackedpid[pid] == 1/ {
2038 self->chdir_arg0 = arg0;
2039 }
2040 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
2041 this->chdir_arg0 = copyinstr(self->chdir_arg0);
2042 printf("%d %d %s(\\"%s\\")\\n",
2043 logindex, pid, probefunc,
2044 this->chdir_arg0);
2045 logindex++;
2046 this->chdir_arg0 = 0;
2047 }
2048 syscall::chdir:return /trackedpid[pid] == 1/ {
2049 self->chdir_arg0 = 0;
2050 }
2051 """
2052
2053 # execve-specific code, tends to throw a lot of exceptions.
2054 D_CODE_EXECVE = """
2055 /* Finally what we care about! */
2056 syscall::exec*:entry /trackedpid[pid]/ {
2057 self->exec_arg0 = copyinstr(arg0);
2058 /* Incrementally probe for a NULL in the argv parameter of execve() to
2059 * figure out argc. */
2060 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
2061 * found. */
2062 self->exec_argc = 0;
2063 /* Probe for argc==1 */
2064 this->exec_argv = (user_addr_t*)copyin(
2065 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2066 self->exec_argc = this->exec_argv[self->exec_argc] ?
2067 (self->exec_argc + 1) : self->exec_argc;
2068
2069 /* Probe for argc==2 */
2070 this->exec_argv = (user_addr_t*)copyin(
2071 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2072 self->exec_argc = this->exec_argv[self->exec_argc] ?
2073 (self->exec_argc + 1) : self->exec_argc;
2074
2075 /* Probe for argc==3 */
2076 this->exec_argv = (user_addr_t*)copyin(
2077 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2078 self->exec_argc = this->exec_argv[self->exec_argc] ?
2079 (self->exec_argc + 1) : self->exec_argc;
2080
2081 /* Probe for argc==4 */
2082 this->exec_argv = (user_addr_t*)copyin(
2083 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2084 self->exec_argc = this->exec_argv[self->exec_argc] ?
2085 (self->exec_argc + 1) : self->exec_argc;
2086
2087 /* Copy the inputs strings since there is no guarantee they'll be
2088 * present after the call completed. */
2089 self->exec_argv0 = (self->exec_argc > 0) ?
2090 copyinstr(this->exec_argv[0]) : "";
2091 self->exec_argv1 = (self->exec_argc > 1) ?
2092 copyinstr(this->exec_argv[1]) : "";
2093 self->exec_argv2 = (self->exec_argc > 2) ?
2094 copyinstr(this->exec_argv[2]) : "";
2095 self->exec_argv3 = (self->exec_argc > 3) ?
2096 copyinstr(this->exec_argv[3]) : "";
2097 this->exec_argv = 0;
2098 }
2099 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2100 /* We need to join strings here, as using multiple printf() would
2101 * cause tearing when multiple threads/processes are traced.
2102 * Since it is impossible to escape a string and join it to another one,
2103 * like sprintf("%s%S", previous, more), use hackery.
2104 * Each of the elements are split with a \\1. \\0 cannot be used because
2105 * it is simply ignored. This will conflict with any program putting a
2106 * \\1 in their execve() string but this should be "rare enough" */
2107 this->args = "";
2108 /* Process exec_argv[0] */
2109 this->args = strjoin(
2110 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2111
2112 /* Process exec_argv[1] */
2113 this->args = strjoin(
2114 this->args, (self->exec_argc > 1) ? "\\1" : "");
2115 this->args = strjoin(
2116 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2117
2118 /* Process exec_argv[2] */
2119 this->args = strjoin(
2120 this->args, (self->exec_argc > 2) ? "\\1" : "");
2121 this->args = strjoin(
2122 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2123
2124 /* Process exec_argv[3] */
2125 this->args = strjoin(
2126 this->args, (self->exec_argc > 3) ? "\\1" : "");
2127 this->args = strjoin(
2128 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2129
2130 /* Prints self->exec_argc to permits verifying the internal
2131 * consistency since this code is quite fishy. */
2132 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2133 logindex, pid, probefunc,
2134 self->exec_arg0,
2135 self->exec_argc,
2136 this->args);
2137 logindex++;
2138 this->args = 0;
2139 }
2140 syscall::exec*:return /trackedpid[pid]/ {
2141 self->exec_arg0 = 0;
2142 self->exec_argc = 0;
2143 self->exec_argv0 = 0;
2144 self->exec_argv1 = 0;
2145 self->exec_argv2 = 0;
2146 self->exec_argv3 = 0;
2147 }
2148 """
2149
2150 # Code currently not used.
2151 D_EXTRANEOUS = """
2152 /* This is a good learning experience, since it traces a lot of things
2153 * related to the process and child processes.
2154 * Warning: it generates a gigantic log. For example, tracing
2155 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2156 * several minutes to execute.
2157 */
2158 /*
2159 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2160 printf("%d %d %s_%s() = %d\\n",
2161 logindex, pid, probeprov, probefunc, errno);
2162 logindex++;
2163 }
2164 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2165 printf("%d %d %s_%s() = %d\\n",
2166 logindex, pid, probeprov, probefunc, errno);
2167 logindex++;
2168 }
2169 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2170 printf("%d %d %s_%s() = %d\\n",
2171 logindex, pid, probeprov, probefunc, errno);
2172 logindex++;
2173 }
2174 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2175 printf("%d %d %s_%s() = %d\\n",
2176 logindex, pid, probeprov, probefunc, errno);
2177 logindex++;
2178 }
2179 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2180 printf("%d %d %s_%s() = %d\\n",
2181 logindex, pid, probeprov, probefunc, errno);
2182 logindex++;
2183 }
2184 */
2185 /* TODO(maruel): *stat* functions and friends
2186 syscall::access:return,
2187 syscall::chdir:return,
2188 syscall::chflags:return,
2189 syscall::chown:return,
2190 syscall::chroot:return,
2191 syscall::getattrlist:return,
2192 syscall::getxattr:return,
2193 syscall::lchown:return,
2194 syscall::lstat64:return,
2195 syscall::lstat:return,
2196 syscall::mkdir:return,
2197 syscall::pathconf:return,
2198 syscall::readlink:return,
2199 syscall::removexattr:return,
2200 syscall::setxattr:return,
2201 syscall::stat64:return,
2202 syscall::stat:return,
2203 syscall::truncate:return,
2204 syscall::unlink:return,
2205 syscall::utimes:return,
2206 */
2207 """
2208
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002209 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002210 """Starts the log collection with dtrace.
2211
2212 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2213 this needs to wait for dtrace to be "warmed up".
2214 """
2215 super(Dtrace.Tracer, self).__init__(logname)
2216 self._script = create_thunk()
2217 # This unique dummy temp file is used to signal the dtrace script that it
2218 # should stop as soon as all the child processes are done. A bit hackish
2219 # but works fine enough.
2220 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2221 prefix='trace_signal_file')
2222
2223 # Note: do not use the -p flag. It's useless if the initial process quits
2224 # too fast, resulting in missing traces from the grand-children. The D
2225 # code manages the dtrace lifetime itself.
2226 trace_cmd = [
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002227 'dtrace',
2228 # Use a larger buffer if getting 'out of scratch space' errors.
2229 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2230 '-b', '10m',
2231 '-x', 'dynvarsize=10m',
2232 #'-x', 'dtrace_global_maxsize=1m',
2233 '-x', 'evaltime=exec',
2234 '-o', '/dev/stderr',
2235 '-q',
2236 '-n', self._get_dtrace_code(),
2237 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002238 if use_sudo is not False:
2239 trace_cmd.insert(0, 'sudo')
2240
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002241 with open(self._logname + '.log', 'wb') as logfile:
2242 self._dtrace = subprocess.Popen(
2243 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2244 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2245
2246 # Reads until one line is printed, which signifies dtrace is up and ready.
2247 with open(self._logname + '.log', 'rb') as logfile:
2248 while 'dtrace_BEGIN' not in logfile.readline():
2249 if self._dtrace.poll() is not None:
2250 # Do a busy wait. :/
2251 break
2252 logging.debug('dtrace started')
2253
2254 def _get_dtrace_code(self):
2255 """Setups the D code to implement child process tracking.
2256
2257 Injects the cookie in the script so it knows when to stop.
2258
2259 The script will detect any instance of the script created with
2260 create_thunk() and will start tracing it.
2261 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002262 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002263 'inline int PID = %d;\n'
2264 'inline string SCRIPT = "%s";\n'
2265 'inline int FILE_ID = %d;\n'
2266 '\n'
2267 '%s') % (
2268 os.getpid(),
2269 self._script,
2270 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002271 self.D_CODE)
2272 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2273 # Do not enable by default since it tends to spew dtrace: error lines
2274 # because the execve() parameters are not in valid memory at the time of
2275 # logging.
2276 # TODO(maruel): Find a way to make this reliable since it's useful but
2277 # only works in limited/trivial uses cases for now.
2278 out += self.D_CODE_EXECVE
2279 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002280
2281 def trace(self, cmd, cwd, tracename, output):
2282 """Runs dtrace on an executable.
2283
2284 This dtruss is broken when it starts the process itself or when tracing
2285 child processes, this code starts a wrapper process
2286 generated with create_thunk() which starts the executable to trace.
2287 """
2288 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2289 assert os.path.isabs(cmd[0]), cmd[0]
2290 assert os.path.isabs(cwd), cwd
2291 assert os.path.normpath(cwd) == cwd, cwd
2292 with self._lock:
2293 if not self._initialized:
2294 raise TracingFailure(
2295 'Called Tracer.trace() on an unitialized object',
2296 None, None, None, tracename)
2297 assert tracename not in (i['trace'] for i in self._traces)
2298
2299 # Starts the script wrapper to start the child process. This signals the
2300 # dtrace script that this process is to be traced.
2301 stdout = stderr = None
2302 if output:
2303 stdout = subprocess.PIPE
2304 stderr = subprocess.STDOUT
2305 child_cmd = [
2306 sys.executable,
2307 self._script,
2308 tracename,
2309 ]
2310 # Call a dummy function so that dtrace knows I'm about to launch a process
2311 # that needs to be traced.
2312 # Yummy.
2313 child = subprocess.Popen(
2314 child_cmd + fix_python_path(cmd),
2315 stdin=subprocess.PIPE,
2316 stdout=stdout,
2317 stderr=stderr,
2318 cwd=cwd)
2319 logging.debug('Started child pid: %d' % child.pid)
2320
2321 out = child.communicate()[0]
2322 # This doesn't mean tracing is done, one of the grand-child process may
2323 # still be alive. It will be tracked with the dtrace script.
2324
2325 with self._lock:
2326 assert tracename not in (i['trace'] for i in self._traces)
2327 self._traces.append(
2328 {
2329 'cmd': cmd,
2330 'cwd': cwd,
2331 # The pid of strace process, not very useful.
2332 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002333 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002334 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002335 })
2336 return child.returncode, out
2337
2338 def close(self, timeout=None):
2339 """Terminates dtrace."""
2340 logging.debug('close(%s)' % timeout)
2341 try:
2342 try:
2343 super(Dtrace.Tracer, self).close(timeout)
2344 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002345 # ftruncate doesn't exist on Windows.
2346 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002347 if timeout:
2348 start = time.time()
2349 # Use polling. :/
2350 while (self._dtrace.poll() is None and
2351 (time.time() - start) < timeout):
2352 time.sleep(0.1)
2353 self._dtrace.kill()
2354 self._dtrace.wait()
2355 finally:
2356 # Make sure to kill it in any case.
2357 if self._dtrace.poll() is None:
2358 try:
2359 self._dtrace.kill()
2360 self._dtrace.wait()
2361 except OSError:
2362 pass
2363
2364 if self._dtrace.returncode != 0:
2365 # Warn about any dtrace failure but basically ignore it.
2366 print 'dtrace failure: %s' % self._dtrace.returncode
2367 finally:
2368 os.close(self._dummy_file_id)
2369 os.remove(self._dummy_file_name)
2370 os.remove(self._script)
2371
2372 def post_process_log(self):
2373 """Sorts the log back in order when each call occured.
2374
2375 dtrace doesn't save the buffer in strict order since it keeps one buffer
2376 per CPU.
2377 """
2378 super(Dtrace.Tracer, self).post_process_log()
2379 logname = self._logname + '.log'
2380 with open(logname, 'rb') as logfile:
2381 lines = [l for l in logfile if l.strip()]
2382 errors = [l for l in lines if l.startswith('dtrace:')]
2383 if errors:
2384 raise TracingFailure(
2385 'Found errors in the trace: %s' % '\n'.join(errors),
2386 None, None, None, logname)
2387 try:
2388 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2389 except ValueError:
2390 raise TracingFailure(
2391 'Found errors in the trace: %s' % '\n'.join(
2392 l for l in lines if l.split(' ', 1)[0].isdigit()),
2393 None, None, None, logname)
2394 with open(logname, 'wb') as logfile:
2395 logfile.write(''.join(lines))
2396
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002397 def __init__(self, use_sudo=None):
2398 super(Dtrace, self).__init__()
2399 self.use_sudo = use_sudo
2400
2401 def get_tracer(self, logname):
2402 return self.Tracer(logname, self.use_sudo)
2403
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002404 @staticmethod
2405 def clean_trace(logname):
2406 for ext in ('', '.log'):
2407 if os.path.isfile(logname + ext):
2408 os.remove(logname + ext)
2409
2410 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002411 def parse_log(cls, logname, blacklist, trace_name):
2412 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002413 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002414
2415 def blacklist_more(filepath):
2416 # All the HFS metadata is in the form /.vol/...
2417 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2418
2419 data = read_json(logname)
2420 out = []
2421 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002422 if trace_name and item['trace'] != trace_name:
2423 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002424 result = {
2425 'output': item['output'],
2426 'trace': item['trace'],
2427 }
2428 try:
2429 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002430 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2431 # be valid UTF-8 and we control the log output.
2432 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002433 context.on_line(line)
2434 result['results'] = context.to_results()
2435 except TracingFailure:
2436 result['exception'] = sys.exc_info()
2437 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002438 return out
2439
2440
2441class LogmanTrace(ApiBase):
2442 """Uses the native Windows ETW based tracing functionality to trace a child
2443 process.
2444
2445 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2446 the Windows Kernel doesn't have a concept of 'current working directory' at
2447 all. A Win32 process has a map of current directories, one per drive letter
2448 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2449 opened relative to another file_object or as an absolute path. All the current
2450 working directory logic is done in user mode.
2451 """
2452 class Context(ApiBase.Context):
2453 """Processes a ETW log line and keeps the list of existent and non
2454 existent files accessed.
2455
2456 Ignores directories.
2457 """
2458 # These indexes are for the stripped version in json.
2459 EVENT_NAME = 0
2460 TYPE = 1
2461 PID = 2
2462 TID = 3
2463 PROCESSOR_ID = 4
2464 TIMESTAMP = 5
2465 USER_DATA = 6
2466
2467 class Process(ApiBase.Context.Process):
2468 def __init__(self, *args):
2469 super(LogmanTrace.Context.Process, self).__init__(*args)
2470 # Handle file objects that succeeded.
2471 self.file_objects = {}
2472
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002473 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2474 logging.info(
2475 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2476 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002477 super(LogmanTrace.Context, self).__init__(blacklist)
2478 self._drive_map = DosDriveMap()
2479 # Threads mapping to the corresponding process id.
2480 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002481 # Process ID of the tracer, e.g. the temporary script created by
2482 # create_thunk(). This is tricky because the process id may have been
2483 # reused.
2484 self._thunk_pid = thunk_pid
2485 self._thunk_cmd = thunk_cmd
2486 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002487 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002488 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002489
2490 def on_line(self, line):
2491 """Processes a json Event line."""
2492 self._line_number += 1
2493 try:
2494 # By Opcode
2495 handler = getattr(
2496 self,
2497 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2498 None)
2499 if not handler:
2500 raise TracingFailure(
2501 'Unexpected event %s_%s' % (
2502 line[self.EVENT_NAME], line[self.TYPE]),
2503 None, None, None)
2504 handler(line)
2505 except TracingFailure, e:
2506 # Hack in the values since the handler could be a static function.
2507 e.pid = line[self.PID]
2508 e.line = line
2509 e.line_number = self._line_number
2510 # Re-raise the modified exception.
2511 raise
2512 except (KeyError, NotImplementedError, ValueError), e:
2513 raise TracingFailure(
2514 'Trace generated a %s exception: %s' % (
2515 e.__class__.__name__, str(e)),
2516 line[self.PID],
2517 self._line_number,
2518 line,
2519 e)
2520
2521 def to_results(self):
2522 if not self.root_process:
2523 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002524 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002525 None, None, None)
2526 process = self.root_process.to_results_process()
2527 return Results(process)
2528
2529 def _thread_to_process(self, tid):
2530 """Finds the process from the thread id."""
2531 tid = int(tid, 16)
2532 pid = self._threads_active.get(tid)
2533 if not pid or not self._process_lookup.get(pid):
2534 return
2535 return self._process_lookup[pid]
2536
2537 @classmethod
2538 def handle_EventTrace_Header(cls, line):
2539 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2540 BUFFER_SIZE = cls.USER_DATA
2541 #VERSION = cls.USER_DATA + 1
2542 #PROVIDER_VERSION = cls.USER_DATA + 2
2543 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2544 #END_TIME = cls.USER_DATA + 4
2545 #TIMER_RESOLUTION = cls.USER_DATA + 5
2546 #MAX_FILE_SIZE = cls.USER_DATA + 6
2547 #LOG_FILE_MODE = cls.USER_DATA + 7
2548 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2549 #START_BUFFERS = cls.USER_DATA + 9
2550 #POINTER_SIZE = cls.USER_DATA + 10
2551 EVENTS_LOST = cls.USER_DATA + 11
2552 #CPU_SPEED = cls.USER_DATA + 12
2553 #LOGGER_NAME = cls.USER_DATA + 13
2554 #LOG_FILE_NAME = cls.USER_DATA + 14
2555 #BOOT_TIME = cls.USER_DATA + 15
2556 #PERF_FREQ = cls.USER_DATA + 16
2557 #START_TIME = cls.USER_DATA + 17
2558 #RESERVED_FLAGS = cls.USER_DATA + 18
2559 #BUFFERS_LOST = cls.USER_DATA + 19
2560 #SESSION_NAME_STRING = cls.USER_DATA + 20
2561 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2562 if line[EVENTS_LOST] != '0':
2563 raise TracingFailure(
2564 ( '%s events were lost during trace, please increase the buffer '
2565 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2566 None, None, None)
2567
2568 def handle_FileIo_Cleanup(self, line):
2569 """General wisdom: if a file is closed, it's because it was opened.
2570
2571 Note that FileIo_Close is not used since if a file was opened properly but
2572 not closed before the process exits, only Cleanup will be logged.
2573 """
2574 #IRP = self.USER_DATA
2575 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2576 FILE_OBJECT = self.USER_DATA + 2
2577 #FILE_KEY = self.USER_DATA + 3
2578 proc = self._thread_to_process(line[TTID])
2579 if not proc:
2580 # Not a process we care about.
2581 return
2582 file_object = line[FILE_OBJECT]
2583 if file_object in proc.file_objects:
2584 proc.add_file(proc.file_objects.pop(file_object), False)
2585
2586 def handle_FileIo_Create(self, line):
2587 """Handles a file open.
2588
2589 All FileIo events are described at
2590 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2591 for some value of 'description'.
2592
2593 " (..) process and thread id values of the IO events (..) are not valid "
2594 http://msdn.microsoft.com/magazine/ee358703.aspx
2595
2596 The FileIo.Create event doesn't return if the CreateFile() call
2597 succeeded, so keep track of the file_object and check that it is
2598 eventually closed with FileIo_Cleanup.
2599 """
2600 #IRP = self.USER_DATA
2601 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2602 FILE_OBJECT = self.USER_DATA + 2
2603 #CREATE_OPTIONS = self.USER_DATA + 3
2604 #FILE_ATTRIBUTES = self.USER_DATA + 4
2605 #self.USER_DATA + SHARE_ACCESS = 5
2606 OPEN_PATH = self.USER_DATA + 6
2607
2608 proc = self._thread_to_process(line[TTID])
2609 if not proc:
2610 # Not a process we care about.
2611 return
2612
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002613 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002614 # Ignore directories and bare drive right away.
2615 if raw_path.endswith(os.path.sep):
2616 return
2617 filepath = self._drive_map.to_win32(raw_path)
2618 # Ignore bare drive right away. Some may still fall through with format
2619 # like '\\?\X:'
2620 if len(filepath) == 2:
2621 return
2622 file_object = line[FILE_OBJECT]
2623 if os.path.isdir(filepath):
2624 # There is no O_DIRECTORY equivalent on Windows. The closed is
2625 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2626 # simply discard directories are they are found.
2627 return
2628 # Override any stale file object
2629 proc.file_objects[file_object] = filepath
2630
2631 def handle_FileIo_Rename(self, line):
2632 # TODO(maruel): Handle?
2633 pass
2634
2635 def handle_Process_End(self, line):
2636 pid = line[self.PID]
2637 if self._process_lookup.get(pid):
2638 logging.info('Terminated: %d' % pid)
2639 self._process_lookup[pid] = None
2640 else:
2641 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002642 if self._thunk_process and self._thunk_process.pid == pid:
2643 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002644
2645 def handle_Process_Start(self, line):
2646 """Handles a new child process started by PID."""
2647 #UNIQUE_PROCESS_KEY = self.USER_DATA
2648 PROCESS_ID = self.USER_DATA + 1
2649 #PARENT_PID = self.USER_DATA + 2
2650 #SESSION_ID = self.USER_DATA + 3
2651 #EXIT_STATUS = self.USER_DATA + 4
2652 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2653 #USER_SID = self.USER_DATA + 6
2654 IMAGE_FILE_NAME = self.USER_DATA + 7
2655 COMMAND_LINE = self.USER_DATA + 8
2656
2657 ppid = line[self.PID]
2658 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002659 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002660 logging.debug(
2661 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002662 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002663
2664 if pid == self._thunk_pid:
2665 # Need to ignore processes we don't know about because the log is
2666 # system-wide. self._thunk_pid shall start only one process.
2667 # This is tricky though because Windows *loves* to reuse process id and
2668 # it happens often that the process ID of the thunk script created by
2669 # create_thunk() is reused. So just detecting the pid here is not
2670 # sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002671 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2672 logging.info(
2673 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2674 pid, self._trace_name, command_line, self._thunk_cmd)
2675 return
2676
2677 # TODO(maruel): The check is quite weak. Add the thunk path.
2678 if self._thunk_process:
2679 raise TracingFailure(
2680 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2681 'already set') % (self._thunk_pid, self._thunk_process.pid),
2682 None, None, None)
2683 proc = self.Process(self.blacklist, pid, None)
2684 self._thunk_process = proc
2685 return
2686 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002687 proc = self.Process(self.blacklist, pid, None)
2688 self.root_process = proc
2689 ppid = None
2690 elif self._process_lookup.get(ppid):
2691 proc = self.Process(self.blacklist, pid, None)
2692 self._process_lookup[ppid].children.append(proc)
2693 else:
2694 # Ignore
2695 return
2696 self._process_lookup[pid] = proc
2697
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002698 proc.command = command_line
2699 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002700 # proc.command[0] may be the absolute path of 'executable' but it may be
2701 # anything else too. If it happens that command[0] ends with executable,
2702 # use it, otherwise defaults to the base name.
2703 cmd0 = proc.command[0].lower()
2704 if not cmd0.endswith('.exe'):
2705 # TODO(maruel): That's not strictly true either.
2706 cmd0 += '.exe'
2707 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
2708 # Fix the path.
2709 cmd0 = cmd0.replace('/', os.path.sep)
2710 cmd0 = os.path.normpath(cmd0)
2711 proc.executable = get_native_path_case(cmd0)
2712 logging.info(
2713 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2714
2715 def handle_Thread_End(self, line):
2716 """Has the same parameters as Thread_Start."""
2717 tid = int(line[self.TID], 16)
2718 self._threads_active.pop(tid, None)
2719
2720 def handle_Thread_Start(self, line):
2721 """Handles a new thread created.
2722
2723 Do not use self.PID here since a process' initial thread is created by
2724 the parent process.
2725 """
2726 PROCESS_ID = self.USER_DATA
2727 TTHREAD_ID = self.USER_DATA + 1
2728 #STACK_BASE = self.USER_DATA + 2
2729 #STACK_LIMIT = self.USER_DATA + 3
2730 #USER_STACK_BASE = self.USER_DATA + 4
2731 #USER_STACK_LIMIT = self.USER_DATA + 5
2732 #AFFINITY = self.USER_DATA + 6
2733 #WIN32_START_ADDR = self.USER_DATA + 7
2734 #TEB_BASE = self.USER_DATA + 8
2735 #SUB_PROCESS_TAG = self.USER_DATA + 9
2736 #BASE_PRIORITY = self.USER_DATA + 10
2737 #PAGE_PRIORITY = self.USER_DATA + 11
2738 #IO_PRIORITY = self.USER_DATA + 12
2739 #THREAD_FLAGS = self.USER_DATA + 13
2740 # Do not use self.PID here since a process' initial thread is created by
2741 # the parent process.
2742 pid = int(line[PROCESS_ID], 16)
2743 tid = int(line[TTHREAD_ID], 16)
2744 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2745 self._threads_active[tid] = pid
2746
2747 @classmethod
2748 def supported_events(cls):
2749 """Returns all the procesed events."""
2750 out = []
2751 for member in dir(cls):
2752 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2753 if match:
2754 out.append(match.groups())
2755 return out
2756
2757 class Tracer(ApiBase.Tracer):
2758 # The basic headers.
2759 EXPECTED_HEADER = [
2760 u'Event Name',
2761 u'Type',
2762 u'Event ID',
2763 u'Version',
2764 u'Channel',
2765 u'Level', # 5
2766 u'Opcode',
2767 u'Task',
2768 u'Keyword',
2769 u'PID',
2770 u'TID', # 10
2771 u'Processor Number',
2772 u'Instance ID',
2773 u'Parent Instance ID',
2774 u'Activity ID',
2775 u'Related Activity ID', # 15
2776 u'Clock-Time',
2777 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2778 u'User(ms)', # pretty much useless.
2779 u'User Data', # Extra arguments that are event-specific.
2780 ]
2781 # Only the useful headers common to all entries are listed there. Any column
2782 # at 19 or higher is dependent on the specific event.
2783 EVENT_NAME = 0
2784 TYPE = 1
2785 PID = 9
2786 TID = 10
2787 PROCESSOR_ID = 11
2788 TIMESTAMP = 16
2789 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2790 USER_DATA = 19
2791
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002792 class CsvReader(object):
2793 """CSV reader that reads files generated by tracerpt.exe.
2794
2795 csv.reader() fails to read them properly, it mangles file names quoted
2796 with "" with a comma in it.
2797 """
2798 # 0. Had a ',' or one of the following ' ' after a comma, next should
2799 # be ' ', '"' or string or ',' for an empty field.
2800 ( HAD_DELIMITER,
2801 # 1. Processing an unquoted field up to ','.
2802 IN_STR,
2803 # 2. Processing a new field starting with '"'.
2804 STARTING_STR_QUOTED,
2805 # 3. Second quote in a row at the start of a field. It could be either
2806 # '""foo""' or '""'. Who the hell thought it was a great idea to use
2807 # the same character for delimiting and escaping?
2808 STARTING_SECOND_QUOTE,
2809 # 4. A quote inside a quoted string where the previous character was
2810 # not a quote, so the string is not empty. Can be either: end of a
2811 # quoted string (a delimiter) or a quote escape. The next char must be
2812 # either '"' or ','.
2813 HAD_QUOTE_IN_QUOTED,
2814 # 5. Second quote inside a quoted string.
2815 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
2816 # 6. Processing a field that started with '"'.
2817 IN_STR_QUOTED) = range(7)
2818
2819 def __init__(self, f):
2820 self.f = f
2821
2822 def __iter__(self):
2823 return self
2824
2825 def next(self):
2826 """Splits the line in fields."""
2827 line = self.f.readline()
2828 if not line:
2829 raise StopIteration()
2830 line = line.strip()
2831 fields = []
2832 state = self.HAD_DELIMITER
2833 for i, c in enumerate(line):
2834 if state == self.HAD_DELIMITER:
2835 if c == ',':
2836 # Empty field.
2837 fields.append('')
2838 elif c == ' ':
2839 # Ignore initial whitespaces
2840 pass
2841 elif c == '"':
2842 state = self.STARTING_STR_QUOTED
2843 fields.append('')
2844 else:
2845 # Start of a new field.
2846 state = self.IN_STR
2847 fields.append(c)
2848
2849 elif state == self.IN_STR:
2850 # Do not accept quote inside unquoted field.
2851 assert c != '"', (i, c, line, fields)
2852 if c == ',':
2853 fields[-1] = fields[-1].strip()
2854 state = self.HAD_DELIMITER
2855 else:
2856 fields[-1] = fields[-1] + c
2857
2858 elif state == self.STARTING_STR_QUOTED:
2859 if c == '"':
2860 # Do not store the character yet.
2861 state = self.STARTING_SECOND_QUOTE
2862 else:
2863 state = self.IN_STR_QUOTED
2864 fields[-1] = fields[-1] + c
2865
2866 elif state == self.STARTING_SECOND_QUOTE:
2867 if c == ',':
2868 # It was an empty field. '""' == ''.
2869 state = self.HAD_DELIMITER
2870 else:
2871 fields[-1] = fields[-1] + '"' + c
2872 state = self.IN_STR_QUOTED
2873
2874 elif state == self.HAD_QUOTE_IN_QUOTED:
2875 if c == ',':
2876 # End of the string.
2877 state = self.HAD_DELIMITER
2878 elif c == '"':
2879 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
2880 else:
2881 # The previous double-quote was just an unescaped quote.
2882 fields[-1] = fields[-1] + '"' + c
2883 state = self.IN_STR_QUOTED
2884
2885 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2886 if c == ',':
2887 # End of the string.
2888 state = self.HAD_DELIMITER
2889 fields[-1] = fields[-1] + '"'
2890 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00002891 # That's just how the logger rolls. Revert back to appending the
2892 # char and "guess" it was a quote in a double-quoted string.
2893 state = self.IN_STR_QUOTED
2894 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002895
2896 elif state == self.IN_STR_QUOTED:
2897 if c == '"':
2898 # Could be a delimiter or an escape.
2899 state = self.HAD_QUOTE_IN_QUOTED
2900 else:
2901 fields[-1] = fields[-1] + c
2902
2903 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2904 fields[-1] = fields[-1] + '"'
2905 else:
2906 assert state in (
2907 # Terminated with a normal field.
2908 self.IN_STR,
2909 # Terminated with an empty field.
2910 self.STARTING_SECOND_QUOTE,
2911 # Terminated with a normal quoted field.
2912 self.HAD_QUOTE_IN_QUOTED), (
2913 line, state, fields)
2914 return fields
2915
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002916 def __init__(self, logname):
2917 """Starts the log collection.
2918
2919 Requires administrative access. logman.exe is synchronous so no need for a
2920 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2921 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2922
2923 One can get the list of potentially interesting providers with:
2924 "logman query providers | findstr /i file"
2925 """
2926 super(LogmanTrace.Tracer, self).__init__(logname)
2927 self._script = create_thunk()
2928 cmd_start = [
2929 'logman.exe',
2930 'start',
2931 'NT Kernel Logger',
2932 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2933 # splitio,fileiocompletion,syscall,file,cswitch,img
2934 '(process,fileio,thread)',
2935 '-o', self._logname + '.etl',
2936 '-ets', # Send directly to kernel
2937 # Values extracted out of thin air.
2938 # Event Trace Session buffer size in kb.
2939 '-bs', '10240',
2940 # Number of Event Trace Session buffers.
2941 '-nb', '16', '256',
2942 ]
2943 logging.debug('Running: %s' % cmd_start)
2944 try:
2945 subprocess.check_call(
2946 cmd_start,
2947 stdin=subprocess.PIPE,
2948 stdout=subprocess.PIPE,
2949 stderr=subprocess.STDOUT)
2950 except subprocess.CalledProcessError, e:
2951 if e.returncode == -2147024891:
2952 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2953 elif e.returncode == -2144337737:
2954 print >> sys.stderr, (
2955 'A kernel trace was already running, stop it and try again')
2956 raise
2957
2958 def trace(self, cmd, cwd, tracename, output):
2959 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2960 assert os.path.isabs(cmd[0]), cmd[0]
2961 assert os.path.isabs(cwd), cwd
2962 assert os.path.normpath(cwd) == cwd, cwd
2963 with self._lock:
2964 if not self._initialized:
2965 raise TracingFailure(
2966 'Called Tracer.trace() on an unitialized object',
2967 None, None, None, tracename)
2968 assert tracename not in (i['trace'] for i in self._traces)
2969
2970 # Use "logman -?" for help.
2971
2972 stdout = stderr = None
2973 if output:
2974 stdout = subprocess.PIPE
2975 stderr = subprocess.STDOUT
2976
2977 # Run the child process.
2978 logging.debug('Running: %s' % cmd)
2979 # Use the temporary script generated with create_thunk() so we have a
2980 # clear pid owner. Since trace_inputs.py can be used as a library and
2981 # could trace multiple processes simultaneously, it makes it more complex
2982 # if the executable to be traced is executed directly here. It also solves
2983 # issues related to logman.exe that needs to be executed to control the
2984 # kernel trace.
2985 child_cmd = [
2986 sys.executable,
2987 self._script,
2988 tracename,
2989 ]
2990 child = subprocess.Popen(
2991 child_cmd + fix_python_path(cmd),
2992 cwd=cwd,
2993 stdin=subprocess.PIPE,
2994 stdout=stdout,
2995 stderr=stderr)
2996 logging.debug('Started child pid: %d' % child.pid)
2997 out = child.communicate()[0]
2998 # This doesn't mean all the grand-children are done. Sadly, we don't have
2999 # a good way to determine that.
3000
3001 with self._lock:
3002 assert tracename not in (i['trace'] for i in self._traces)
3003 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00003004 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003005 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00003006 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003007 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00003008 # Used to figure out the real process when process ids are reused.
3009 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003010 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003011 })
3012
3013 return child.returncode, out
3014
3015 def close(self, _timeout=None):
3016 """Stops the kernel log collection and converts the traces to text
3017 representation.
3018 """
3019 with self._lock:
3020 if not self._initialized:
3021 raise TracingFailure(
3022 'Called Tracer.close() on an unitialized object',
3023 None, None, None)
3024 os.remove(self._script)
3025 # Save metadata, add 'format' key..
3026 data = {
3027 'format': 'csv',
3028 'traces': self._traces,
3029 }
3030 write_json(self._logname, data, False)
3031
3032 cmd_stop = [
3033 'logman.exe',
3034 'stop',
3035 'NT Kernel Logger',
3036 '-ets', # Sends the command directly to the kernel.
3037 ]
3038 logging.debug('Running: %s' % cmd_stop)
3039 subprocess.check_call(
3040 cmd_stop,
3041 stdin=subprocess.PIPE,
3042 stdout=subprocess.PIPE,
3043 stderr=subprocess.STDOUT)
3044 self._initialized = False
3045
3046 def post_process_log(self):
3047 """Converts the .etl file into .csv then into .json."""
3048 super(LogmanTrace.Tracer, self).post_process_log()
3049 logformat = 'csv'
3050 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003051 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003052
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003053 def _trim_log(self, logformat):
3054 """Reduces the amount of data in original log by generating a 'reduced'
3055 log.
3056 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003057 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003058 file_handle = codecs.open(
3059 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003060
3061 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003062 assert sys.getfilesystemencoding() == 'mbcs'
3063 file_handle = codecs.open(
3064 self._logname + '.' + logformat, 'r',
3065 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003066
3067 supported_events = LogmanTrace.Context.supported_events()
3068
3069 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003070 """Loads items from the generator and returns the interesting data.
3071
3072 It filters out any uninteresting line and reduce the amount of data in
3073 the trace.
3074 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003075 for index, line in enumerate(generator):
3076 if not index:
3077 if line != self.EXPECTED_HEADER:
3078 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003079 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003080 None, None, None)
3081 continue
3082 # As you can see, the CSV is full of useful non-redundant information:
3083 if (line[2] != '0' or # Event ID
3084 line[3] not in ('2', '3') or # Version
3085 line[4] != '0' or # Channel
3086 line[5] != '0' or # Level
3087 line[7] != '0' or # Task
3088 line[8] != '0x0000000000000000' or # Keyword
3089 line[12] != '' or # Instance ID
3090 line[13] != '' or # Parent Instance ID
3091 line[14] != self.NULL_GUID or # Activity ID
3092 line[15] != ''): # Related Activity ID
3093 raise TracingFailure(
3094 'Found unexpected values in line: %s' % ' '.join(line),
3095 None, None, None)
3096
3097 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3098 continue
3099
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003100 yield [
3101 line[self.EVENT_NAME],
3102 line[self.TYPE],
3103 line[self.PID],
3104 line[self.TID],
3105 line[self.PROCESSOR_ID],
3106 line[self.TIMESTAMP],
3107 ] + line[self.USER_DATA:]
3108
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003109 # must not convert the trim() call into a list, since it will use too much
3110 # memory for large trace. use a csv file as a workaround since the json
3111 # parser requires a complete in-memory file.
3112 with open('%s.preprocessed' % self._logname, 'wb') as f:
3113 # $ and * can't be used in file name on windows, reducing the likelihood
3114 # of having to escape a string.
3115 out = csv.writer(
3116 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003117 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003118 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003119
3120 def _convert_log(self, logformat):
3121 """Converts the ETL trace to text representation.
3122
3123 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3124 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3125
3126 Arguments:
3127 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3128
3129 Use "tracerpt -?" for help.
3130 """
3131 LOCALE_INVARIANT = 0x7F
3132 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3133 cmd_convert = [
3134 'tracerpt.exe',
3135 '-l', self._logname + '.etl',
3136 '-o', self._logname + '.' + logformat,
3137 '-gmt', # Use UTC
3138 '-y', # No prompt
3139 # Use -of XML to get the header of each items after column 19, e.g. all
3140 # the actual headers of 'User Data'.
3141 ]
3142
3143 if logformat == 'csv':
3144 # tracerpt localizes the 'Type' column, for major brainfuck
3145 # entertainment. I can't imagine any sane reason to do that.
3146 cmd_convert.extend(['-of', 'CSV'])
3147 elif logformat == 'csv_utf16':
3148 # This causes it to use UTF-16, which doubles the log size but ensures
3149 # the log is readable for non-ASCII characters.
3150 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3151 elif logformat == 'xml':
3152 cmd_convert.extend(['-of', 'XML'])
3153 else:
3154 raise ValueError('Unexpected log format \'%s\'' % logformat)
3155 logging.debug('Running: %s' % cmd_convert)
3156 # This can takes tens of minutes for large logs.
3157 # Redirects all output to stderr.
3158 subprocess.check_call(
3159 cmd_convert,
3160 stdin=subprocess.PIPE,
3161 stdout=sys.stderr,
3162 stderr=sys.stderr)
3163
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003164 def __init__(self, use_sudo=False): # pylint: disable=W0613
3165 super(LogmanTrace, self).__init__()
3166 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3167
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003168 @staticmethod
3169 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003170 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003171 if os.path.isfile(logname + ext):
3172 os.remove(logname + ext)
3173
3174 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003175 def parse_log(cls, logname, blacklist, trace_name):
3176 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003177 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003178
3179 def blacklist_more(filepath):
3180 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3181 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3182
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003183 # Create a list of (Context, result_dict) tuples. This is necessary because
3184 # the csv file may be larger than the amount of available memory.
3185 contexes = [
3186 (
3187 cls.Context(
3188 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3189 {
3190 'output': item['output'],
3191 'trace': item['trace'],
3192 },
3193 )
3194 for item in read_json(logname)['traces']
3195 if not trace_name or item['trace'] == trace_name
3196 ]
3197
3198 # The log may be too large to fit in memory and it is not efficient to read
3199 # it multiple times, so multiplex the contexes instead, which is slightly
3200 # more awkward.
3201 with open('%s.preprocessed' % logname, 'rb') as f:
3202 lines = csv.reader(
3203 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3204 for encoded in lines:
3205 line = [s.decode('utf-8') for s in encoded]
3206 # Convert the PID in-place from hex.
3207 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3208 for context in contexes:
3209 if 'exception' in context[1]:
3210 continue
3211 try:
3212 context[0].on_line(line)
3213 except TracingFailure:
3214 context[1]['exception'] = sys.exc_info()
3215
3216 for context in contexes:
3217 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003218 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003219 context[1]['results'] = context[0].to_results()
3220
3221 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003222
3223
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003224def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003225 """Returns the correct implementation for the current OS."""
3226 if sys.platform == 'cygwin':
3227 raise NotImplementedError(
3228 'Not implemented for cygwin, start the script from Win32 python')
3229 flavors = {
3230 'win32': LogmanTrace,
3231 'darwin': Dtrace,
3232 'sunos5': Dtrace,
3233 'freebsd7': Dtrace,
3234 'freebsd8': Dtrace,
3235 }
3236 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003237 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003238
3239
3240def extract_directories(root_dir, files, blacklist):
3241 """Detects if all the files in a directory are in |files| and if so, replace
3242 the individual files by a Results.Directory instance.
3243
3244 Takes a list of Results.File instances and returns a shorter list of
3245 Results.File and Results.Directory instances.
3246
3247 Arguments:
3248 - root_dir: Optional base directory that shouldn't be search further.
3249 - files: list of Results.File instances.
3250 - blacklist: regexp of files to ignore, for example r'.+\.pyc'.
3251 """
3252 logging.info(
3253 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3254 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003255 # It is important for root_dir to not be a symlinked path, make sure to call
3256 # os.path.realpath() as needed.
3257 assert not root_dir or (
3258 os.path.realpath(get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003259 assert not any(isinstance(f, Results.Directory) for f in files)
3260 # Remove non existent files.
3261 files = [f for f in files if f.existent]
3262 if not files:
3263 return files
3264 # All files must share the same root, which can be None.
3265 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3266
3267 # Creates a {directory: {filename: File}} mapping, up to root.
3268 buckets = {}
3269 if root_dir:
3270 buckets[root_dir] = {}
3271 for fileobj in files:
3272 path = fileobj.full_path
3273 directory = os.path.dirname(path)
3274 assert directory
3275 # Do not use os.path.basename() so trailing os.path.sep is kept.
3276 basename = path[len(directory)+1:]
3277 files_in_directory = buckets.setdefault(directory, {})
3278 files_in_directory[basename] = fileobj
3279 # Add all the directories recursively up to root.
3280 while True:
3281 old_d = directory
3282 directory = os.path.dirname(directory)
3283 if directory + os.path.sep == root_dir or directory == old_d:
3284 break
3285 buckets.setdefault(directory, {})
3286
3287 root_prefix = len(root_dir) + 1 if root_dir else 0
3288 for directory in sorted(buckets, reverse=True):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003289 if not os.path.isdir(directory):
3290 logging.debug(
3291 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3292 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003293 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3294 expected = set(buckets[directory])
3295 if not (actual - expected):
3296 parent = os.path.dirname(directory)
3297 buckets[parent][os.path.basename(directory)] = Results.Directory(
3298 root_dir,
3299 directory[root_prefix:],
3300 False,
3301 sum(f.size for f in buckets[directory].itervalues()),
3302 sum(f.nb_files for f in buckets[directory].itervalues()))
3303 # Remove the whole bucket.
3304 del buckets[directory]
3305
3306 # Reverse the mapping with what remains. The original instances are returned,
3307 # so the cached meta data is kept.
3308 files = sum((x.values() for x in buckets.itervalues()), [])
3309 return sorted(files, key=lambda x: x.path)
3310
3311
3312def trace(logfile, cmd, cwd, api, output):
3313 """Traces an executable. Returns (returncode, output) from api.
3314
3315 Arguments:
3316 - logfile: file to write to.
3317 - cmd: command to run.
3318 - cwd: current directory to start the process in.
3319 - api: a tracing api instance.
3320 - output: if True, returns output, otherwise prints it at the console.
3321 """
3322 cmd = fix_python_path(cmd)
3323 api.clean_trace(logfile)
3324 with api.get_tracer(logfile) as tracer:
3325 return tracer.trace(cmd, cwd, 'default', output)
3326
3327
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003328def CMDclean(args):
3329 """Cleans up traces."""
3330 parser = OptionParserTraceInputs(command='clean')
3331 options, args = parser.parse_args(args)
3332 api = get_api()
3333 api.clean_trace(options.log)
3334 return 0
3335
3336
3337def CMDtrace(args):
3338 """Traces an executable."""
3339 parser = OptionParserTraceInputs(command='trace')
3340 parser.allow_interspersed_args = False
3341 parser.add_option(
3342 '-q', '--quiet', action='store_true',
3343 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003344 parser.add_option(
3345 '-s', '--sudo', action='store_true',
3346 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3347 parser.add_option(
3348 '-n', '--no-sudo', action='store_false',
3349 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003350 options, args = parser.parse_args(args)
3351
3352 if not args:
3353 parser.error('Please provide a command to run')
3354
3355 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3356 args[0] = os.path.abspath(args[0])
3357
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003358 # options.sudo default value is None, which is to do whatever tracer defaults
3359 # do.
3360 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003361 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3362
3363
3364def CMDread(args):
3365 """Reads the logs and prints the result."""
3366 parser = OptionParserTraceInputs(command='read')
3367 parser.add_option(
3368 '-V', '--variable',
3369 nargs=2,
3370 action='append',
3371 dest='variables',
3372 metavar='VAR_NAME directory',
3373 default=[],
3374 help=('Variables to replace relative directories against. Example: '
3375 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3376 'home dir with $HOME') % getpass.getuser())
3377 parser.add_option(
3378 '--root-dir',
3379 help='Root directory to base everything off it. Anything outside of this '
3380 'this directory will not be reported')
3381 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003382 '--trace-name',
3383 help='Only reads one of the trace. Defaults to reading all traces')
3384 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003385 '-j', '--json', action='store_true',
3386 help='Outputs raw result data as json')
3387 parser.add_option(
3388 '-b', '--blacklist', action='append', default=[],
3389 help='List of regexp to use as blacklist filter')
3390 options, args = parser.parse_args(args)
3391
3392 if options.root_dir:
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003393 options.root_dir = unicode(os.path.abspath(options.root_dir))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003394
3395 variables = dict(options.variables)
3396 api = get_api()
3397 def blacklist(f):
3398 return any(re.match(b, f) for b in options.blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003399 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003400 # Process each trace.
3401 output_as_json = []
3402 for item in data:
3403 if 'exception' in item:
maruel@chromium.org424f5692012-10-15 15:42:38 +00003404 # Do not abort the other traces.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003405 print >> sys.stderr, (
maruel@chromium.org424f5692012-10-15 15:42:38 +00003406 'Trace %s: Got an exception: %s' % (
3407 item['trace'], item['exception'][1]))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003408 continue
3409 results = item['results']
3410 if options.root_dir:
3411 results = results.strip_root(options.root_dir)
3412
3413 if options.json:
3414 output_as_json.append(results.flatten())
3415 else:
3416 simplified = extract_directories(
3417 options.root_dir, results.files, blacklist)
3418 simplified = [f.replace_variables(variables) for f in simplified]
3419 if len(data) > 1:
3420 print('Trace: %s' % item['trace'])
3421 print('Total: %d' % len(results.files))
3422 print('Non existent: %d' % len(results.non_existent))
3423 for f in results.non_existent:
3424 print(' %s' % f.path)
3425 print(
3426 'Interesting: %d reduced to %d' % (
3427 len(results.existent), len(simplified)))
3428 for f in simplified:
3429 print(' %s' % f.path)
3430
3431 if options.json:
3432 write_json(sys.stdout, output_as_json, False)
3433 return 0
3434
3435
3436class OptionParserWithLogging(optparse.OptionParser):
3437 """Adds --verbose option."""
3438 def __init__(self, verbose=0, **kwargs):
3439 optparse.OptionParser.__init__(self, **kwargs)
3440 self.add_option(
3441 '-v', '--verbose',
3442 action='count',
3443 default=verbose,
3444 help='Use multiple times to increase verbosity')
3445
3446 def parse_args(self, *args, **kwargs):
3447 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
3448 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
3449 logging.basicConfig(
3450 level=levels[min(len(levels)-1, options.verbose)],
3451 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
3452 return options, args
3453
3454
3455class OptionParserWithNiceDescription(OptionParserWithLogging):
3456 """Generates the description with the command's docstring."""
3457 def __init__(self, **kwargs):
3458 """Sets 'description' and 'usage' if not already specified."""
3459 command = kwargs.pop('command', 'help')
3460 kwargs.setdefault(
3461 'description',
3462 re.sub('[\r\n ]{2,}', ' ', get_command_handler(command).__doc__))
3463 kwargs.setdefault('usage', '%%prog %s [options]' % command)
3464 OptionParserWithLogging.__init__(self, **kwargs)
3465
3466
3467class OptionParserTraceInputs(OptionParserWithNiceDescription):
3468 """Adds automatic --log handling."""
3469 def __init__(self, **kwargs):
3470 OptionParserWithNiceDescription.__init__(self, **kwargs)
3471 self.add_option(
3472 '-l', '--log', help='Log file to generate or read, required')
3473
3474 def parse_args(self, *args, **kwargs):
3475 """Makes sure the paths make sense.
3476
3477 On Windows, / and \ are often mixed together in a path.
3478 """
3479 options, args = OptionParserWithNiceDescription.parse_args(
3480 self, *args, **kwargs)
3481 if not options.log:
3482 self.error('Must supply a log file with -l')
3483 options.log = os.path.abspath(options.log)
3484 return options, args
3485
3486
3487def extract_documentation():
3488 """Returns a dict {command: description} for each of documented command."""
3489 commands = (
3490 fn[3:]
3491 for fn in dir(sys.modules['__main__'])
3492 if fn.startswith('CMD') and get_command_handler(fn[3:]).__doc__)
3493 return dict((fn, get_command_handler(fn).__doc__) for fn in commands)
3494
3495
3496def CMDhelp(args):
3497 """Prints list of commands or help for a specific command."""
3498 doc = extract_documentation()
3499 # Calculates the optimal offset.
3500 offset = max(len(cmd) for cmd in doc)
3501 format_str = ' %-' + str(offset + 2) + 's %s'
3502 # Generate a one-liner documentation of each commands.
3503 commands_description = '\n'.join(
3504 format_str % (cmd, doc[cmd].split('\n')[0]) for cmd in sorted(doc))
3505
3506 parser = OptionParserWithNiceDescription(
3507 usage='%prog <command> [options]',
3508 description='Commands are:\n%s\n' % commands_description)
3509 parser.format_description = lambda _: parser.description
3510
3511 # Strip out any -h or --help argument.
3512 _, args = parser.parse_args([i for i in args if not i in ('-h', '--help')])
3513 if len(args) == 1:
3514 if not get_command_handler(args[0]):
3515 parser.error('Unknown command %s' % args[0])
3516 # The command was "%prog help command", replaces ourself with
3517 # "%prog command --help" so help is correctly printed out.
3518 return main(args + ['--help'])
3519 elif args:
3520 parser.error('Unknown argument "%s"' % ' '.join(args))
3521 parser.print_help()
3522 return 0
3523
3524
3525def get_command_handler(name):
3526 """Returns the command handler or CMDhelp if it doesn't exist."""
3527 return getattr(sys.modules['__main__'], 'CMD%s' % name, None)
3528
3529
3530def main_impl(argv):
3531 command = get_command_handler(argv[0] if argv else 'help')
3532 if not command:
3533 return CMDhelp(argv)
3534 return command(argv[1:])
3535
3536def main(argv):
3537 try:
3538 main_impl(argv)
3539 except TracingFailure, e:
3540 sys.stderr.write('\nError: ')
3541 sys.stderr.write(str(e))
3542 sys.stderr.write('\n')
3543 return 1
3544
3545
3546if __name__ == '__main__':
3547 sys.exit(main(sys.argv[1:]))