blob: 05f37916587741b5f02ef217a387c9d5afa935bb [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
3# Copyright (c) 2012 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
23import getpass
24import glob
25import json
26import logging
maruel@chromium.org306e0e72012-11-02 18:22:03 +000027import unicodedata
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000028import optparse
29import os
30import re
31import subprocess
32import sys
33import tempfile
34import threading
35import time
36import weakref
37
38## OS-specific imports
39
40if sys.platform == 'win32':
41 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p
42 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611
43 from ctypes.wintypes import GetLastError # pylint: disable=E0611
44elif sys.platform == 'darwin':
45 import Carbon.File # pylint: disable=F0401
46 import MacOS # pylint: disable=F0401
47
48
49BASE_DIR = os.path.dirname(os.path.abspath(__file__))
50ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
51
52
53class TracingFailure(Exception):
54 """An exception occured during tracing."""
55 def __init__(self, description, pid, line_number, line, *args):
56 super(TracingFailure, self).__init__(
57 description, pid, line_number, line, *args)
58 self.description = description
59 self.pid = pid
60 self.line_number = line_number
61 self.line = line
62 self.extra = args
63
64 def __str__(self):
65 out = self.description
66 if self.pid:
67 out += '\npid: %d' % self.pid
68 if self.line_number:
69 out += '\nline: %d' % self.line_number
70 if self.line:
71 out += '\n%s' % self.line
72 if self.extra:
73 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
74 return out
75
76
77## OS-specific functions
78
79if sys.platform == 'win32':
80 def QueryDosDevice(drive_letter):
81 """Returns the Windows 'native' path for a DOS drive letter."""
82 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
maruel@chromium.org306e0e72012-11-02 18:22:03 +000083 assert isinstance(drive_letter, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000084 # Guesswork. QueryDosDeviceW never returns the required number of bytes.
85 chars = 1024
maruel@chromium.org306e0e72012-11-02 18:22:03 +000086 drive_letter = drive_letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000087 p = create_unicode_buffer(chars)
88 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):
89 err = GetLastError()
90 if err:
91 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +000092 msg = u'QueryDosDevice(%s): %s (%d)' % (
93 drive_letter, FormatError(err), err)
94 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000095 return p.value
96
97
98 def GetShortPathName(long_path):
99 """Returns the Windows short path equivalent for a 'long' path."""
maruel@chromium.org68bb1482013-02-15 22:36:24 +0000100 assert isinstance(long_path, unicode), repr(long_path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000101 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
102 # not enforced.
103 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):
104 long_path = '\\\\?\\' + long_path
105 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)
106 if chars:
107 p = create_unicode_buffer(chars)
108 if windll.kernel32.GetShortPathNameW(long_path, p, chars):
109 return p.value
110
111 err = GetLastError()
112 if err:
113 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000114 msg = u'GetShortPathName(%s): %s (%d)' % (
115 long_path, FormatError(err), err)
116 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000117
118
119 def GetLongPathName(short_path):
120 """Returns the Windows long path equivalent for a 'short' path."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000121 assert isinstance(short_path, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000122 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
123 # not enforced.
124 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):
125 short_path = '\\\\?\\' + short_path
126 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)
127 if chars:
128 p = create_unicode_buffer(chars)
129 if windll.kernel32.GetLongPathNameW(short_path, p, chars):
130 return p.value
131
132 err = GetLastError()
133 if err:
134 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000135 msg = u'GetLongPathName(%s): %s (%d)' % (
136 short_path, FormatError(err), err)
137 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000138
139
140 def get_current_encoding():
141 """Returns the 'ANSI' code page associated to the process."""
142 return 'cp%d' % int(windll.kernel32.GetACP())
143
144
145 class DosDriveMap(object):
146 """Maps \Device\HarddiskVolumeN to N: on Windows."""
147 # Keep one global cache.
148 _MAPPING = {}
149
150 def __init__(self):
151 """Lazy loads the cache."""
152 if not self._MAPPING:
153 # This is related to UNC resolver on windows. Ignore that.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000154 self._MAPPING[u'\\Device\\Mup'] = None
155 self._MAPPING[u'\\SystemRoot'] = os.environ[u'SystemRoot']
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000156
157 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
158 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000159 letter = u'%s:' % letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000160 mapped = QueryDosDevice(letter)
161 if mapped in self._MAPPING:
162 logging.warn(
163 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
164 '. Drive letters are a user-mode concept and the kernel '
165 'traces only have NT path, so all accesses will be '
166 'associated with the first drive letter, independent of the '
167 'actual letter used by the code') % (
168 self._MAPPING[mapped], letter))
169 else:
170 self._MAPPING[mapped] = letter
171 except WindowsError: # pylint: disable=E0602
172 pass
173
174 def to_win32(self, path):
175 """Converts a native NT path to Win32/DOS compatible path."""
176 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
177 if not match:
178 raise ValueError(
179 'Can\'t convert %s into a Win32 compatible path' % path,
180 path)
181 if not match.group(1) in self._MAPPING:
182 # Unmapped partitions may be accessed by windows for the
183 # fun of it while the test is running. Discard these.
184 return None
185 drive = self._MAPPING[match.group(1)]
186 if not drive or not match.group(2):
187 return drive
188 return drive + match.group(2)
189
190
191 def isabs(path):
192 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""
193 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
194
195
196 def get_native_path_case(p):
197 """Returns the native path case for an existing file.
198
199 On Windows, removes any leading '\\?\'.
200 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000201 assert isinstance(p, unicode), repr(p)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000202 if not isabs(p):
203 raise ValueError(
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000204 'get_native_path_case(%r): Require an absolute path' % p, p)
205
maruel@chromium.org037758d2012-12-10 17:59:46 +0000206 # Make sure it is normalized to os.path.sep. Do not do it here to keep the
207 # function fast
208 assert '/' not in p, p
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000209 suffix = ''
210 count = p.count(':')
211 if count > 1:
212 # This means it has an alternate-data stream. There could be 3 ':', since
213 # it could be the $DATA datastream of an ADS. Split the whole ADS suffix
214 # off and add it back afterward. There is no way to know the native path
215 # case of an alternate data stream.
216 items = p.split(':')
217 p = ':'.join(items[0:2])
218 suffix = ''.join(':' + i for i in items[2:])
219
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000220 # Windows used to have an option to turn on case sensitivity on non Win32
221 # subsystem but that's out of scope here and isn't supported anymore.
222 # Go figure why GetShortPathName() is needed.
223 try:
224 out = GetLongPathName(GetShortPathName(p))
225 except OSError, e:
226 if e.args[0] in (2, 3, 5):
227 # The path does not exist. Try to recurse and reconstruct the path.
228 base = os.path.dirname(p)
229 rest = os.path.basename(p)
230 return os.path.join(get_native_path_case(base), rest)
231 raise
232 if out.startswith('\\\\?\\'):
233 out = out[4:]
234 # Always upper case the first letter since GetLongPathName() will return the
235 # drive letter in the case it was given.
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000236 return out[0].upper() + out[1:] + suffix
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000237
238
239 def CommandLineToArgvW(command_line):
240 """Splits a commandline into argv using CommandLineToArgvW()."""
241 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
242 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000243 assert isinstance(command_line, unicode)
244 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000245 try:
246 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
247 finally:
248 windll.kernel32.LocalFree(ptr)
249
250
251elif sys.platform == 'darwin':
252
253
254 # On non-windows, keep the stdlib behavior.
255 isabs = os.path.isabs
256
257
258 def _find_item_native_case(root_path, item):
259 """Gets the native path case of a single item based at root_path.
260
261 There is no API to get the native path case of symlinks on OSX. So it
262 needs to be done the slow way.
263 """
264 item = item.lower()
265 for element in os.listdir(root_path):
266 if element.lower() == item:
267 return element
268
269
270 def _native_case(p):
271 """Gets the native path case. Warning: this function resolves symlinks."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000272 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000273 rel_ref, _ = Carbon.File.FSPathMakeRef(p.encode('utf-8'))
274 # The OSX underlying code uses NFD but python strings are in NFC. This
275 # will cause issues with os.listdir() for example. Since the dtrace log
276 # *is* in NFC, normalize it here.
csharp@chromium.orgade593e2013-03-05 21:50:43 +0000277 raw_nfd_path = rel_ref.FSRefMakePath().decode('utf-8')
278 logging.debug('raw NFD path: %s' % raw_nfd_path)
csharp@chromium.orgc153ce22013-03-05 22:13:28 +0000279 out = unicodedata.normalize('NFC', raw_nfd_path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000280 if p.endswith(os.path.sep) and not out.endswith(os.path.sep):
281 return out + os.path.sep
csharp@chromium.orgade593e2013-03-05 21:50:43 +0000282 logging.debug('_native_case(%s) = %s' % (p, out))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000283 return out
284 except MacOS.Error, e:
285 if e.args[0] in (-43, -120):
286 # The path does not exist. Try to recurse and reconstruct the path.
287 # -43 means file not found.
288 # -120 means directory not found.
289 base = os.path.dirname(p)
290 rest = os.path.basename(p)
csharp@chromium.orgade593e2013-03-05 21:50:43 +0000291 out = os.path.join(_native_case(base), rest)
292 logging.debug('_native_case(%s) = %s' % (p, out))
293 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000294 raise OSError(
295 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])
296
297
298 def _split_at_symlink_native(base_path, rest):
299 """Returns the native path for a symlink."""
300 base, symlink, rest = split_at_symlink(base_path, rest)
301 if symlink:
302 if not base_path:
303 base_path = base
304 else:
305 base_path = safe_join(base_path, base)
306 symlink = _find_item_native_case(base_path, symlink)
307 return base, symlink, rest
308
309
310 def get_native_path_case(path):
311 """Returns the native path case for an existing file.
312
313 Technically, it's only HFS+ on OSX that is case preserving and
314 insensitive. It's the default setting on HFS+ but can be changed.
315 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000316 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000317 if not isabs(path):
318 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000319 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000320 if path.startswith('/dev'):
321 # /dev is not visible from Carbon, causing an exception.
322 return path
323
324 # Starts assuming there is no symlink along the path.
325 resolved = _native_case(path)
326 if resolved.lower() == path.lower():
327 # This code path is incredibly faster.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000328 logging.debug('get_native_path_case(%s) = %s' % (path, resolved))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000329 return resolved
330
331 # There was a symlink, process it.
332 base, symlink, rest = _split_at_symlink_native(None, path)
333 assert symlink, (path, base, symlink, rest, resolved)
334 prev = base
335 base = safe_join(_native_case(base), symlink)
336 assert len(base) > len(prev)
337 while rest:
338 prev = base
339 relbase, symlink, rest = _split_at_symlink_native(base, rest)
340 base = safe_join(base, relbase)
341 assert len(base) > len(prev), (prev, base, symlink)
342 if symlink:
343 base = safe_join(base, symlink)
344 assert len(base) > len(prev), (prev, base, symlink)
345 # Make sure no symlink was resolved.
346 assert base.lower() == path.lower(), (base, path)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000347 logging.debug('get_native_path_case(%s) = %s' % (path, base))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000348 return base
349
350
351else: # OSes other than Windows and OSX.
352
353
354 # On non-windows, keep the stdlib behavior.
355 isabs = os.path.isabs
356
357
358 def get_native_path_case(path):
359 """Returns the native path case for an existing file.
360
361 On OSes other than OSX and Windows, assume the file system is
362 case-sensitive.
363
364 TODO(maruel): This is not strictly true. Implement if necessary.
365 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000366 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000367 if not isabs(path):
368 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000369 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000370 # Give up on cygwin, as GetLongPathName() can't be called.
371 # Linux traces tends to not be normalized so use this occasion to normalize
372 # it. This function implementation already normalizes the path on the other
373 # OS so this needs to be done here to be coherent between OSes.
374 out = os.path.normpath(path)
375 if path.endswith(os.path.sep) and not out.endswith(os.path.sep):
376 return out + os.path.sep
377 return out
378
379
380if sys.platform != 'win32': # All non-Windows OSes.
381
382
383 def safe_join(*args):
384 """Joins path elements like os.path.join() but doesn't abort on absolute
385 path.
386
387 os.path.join('foo', '/bar') == '/bar'
388 but safe_join('foo', '/bar') == 'foo/bar'.
389 """
390 out = ''
391 for element in args:
392 if element.startswith(os.path.sep):
393 if out.endswith(os.path.sep):
394 out += element[1:]
395 else:
396 out += element
397 else:
398 if out.endswith(os.path.sep):
399 out += element
400 else:
401 out += os.path.sep + element
402 return out
403
404
405 def split_at_symlink(base_dir, relfile):
406 """Scans each component of relfile and cut the string at the symlink if
407 there is any.
408
409 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if
410 not symlink was found.
411 """
412 if base_dir:
413 assert relfile
414 assert os.path.isabs(base_dir)
415 index = 0
416 else:
417 assert os.path.isabs(relfile)
418 index = 1
419
420 def at_root(rest):
421 if base_dir:
422 return safe_join(base_dir, rest)
423 return rest
424
425 while True:
426 try:
427 index = relfile.index(os.path.sep, index)
428 except ValueError:
429 index = len(relfile)
430 full = at_root(relfile[:index])
431 if os.path.islink(full):
432 # A symlink!
433 base = os.path.dirname(relfile[:index])
434 symlink = os.path.basename(relfile[:index])
435 rest = relfile[index:]
436 logging.debug(
437 'split_at_symlink(%s, %s) -> (%s, %s, %s)' %
438 (base_dir, relfile, base, symlink, rest))
439 return base, symlink, rest
440 if index == len(relfile):
441 break
442 index += 1
443 return relfile, None, None
444
445
446def fix_python_path(cmd):
447 """Returns the fixed command line to call the right python executable."""
448 out = cmd[:]
449 if out[0] == 'python':
450 out[0] = sys.executable
451 elif out[0].endswith('.py'):
452 out.insert(0, sys.executable)
453 return out
454
455
456def create_thunk():
457 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
458 os.write(
459 handle,
460 (
461 'import subprocess\n'
462 'import sys\n'
463 'sys.exit(subprocess.call(sys.argv[2:]))\n'
464 ))
465 os.close(handle)
466 return name
467
468
469def strace_process_quoted_arguments(text):
470 """Extracts quoted arguments on a string and return the arguments as a list.
471
472 Implemented as an automaton. Supports incomplete strings in the form
473 '"foo"...'.
474
475 Example:
476 With text = '"foo", "bar"', the function will return ['foo', 'bar']
477
478 TODO(maruel): Implement escaping.
479 """
480 # All the possible states of the DFA.
481 ( NEED_QUOTE, # Begining of a new arguments.
482 INSIDE_STRING, # Inside an argument.
483 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
484 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
485 # a serie of 3 dots or a comma.
486 NEED_SPACE, # Right after a comma
487 NEED_DOT_2, # Found a dot, need a second one.
488 NEED_DOT_3, # Found second dot, need a third one.
489 NEED_COMMA, # Found third dot, need a comma.
490 ) = range(8)
491
492 state = NEED_QUOTE
493 out = []
494 for index, char in enumerate(text):
495 if char == '"':
496 if state == NEED_QUOTE:
497 state = INSIDE_STRING
498 # A new argument was found.
499 out.append('')
500 elif state == INSIDE_STRING:
501 # The argument is now closed.
502 state = NEED_COMMA_OR_DOT
503 elif state == ESCAPED:
504 out[-1] += char
505 state = INSIDE_STRING
506 else:
507 raise ValueError(
508 'Can\'t process char at column %d for: %r' % (index, text),
509 index,
510 text)
511 elif char == ',':
512 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
513 state = NEED_SPACE
514 elif state == INSIDE_STRING:
515 out[-1] += char
516 elif state == ESCAPED:
517 out[-1] += char
518 state = INSIDE_STRING
519 else:
520 raise ValueError(
521 'Can\'t process char at column %d for: %r' % (index, text),
522 index,
523 text)
524 elif char == ' ':
525 if state == NEED_SPACE:
526 state = NEED_QUOTE
527 elif state == INSIDE_STRING:
528 out[-1] += char
529 elif state == ESCAPED:
530 out[-1] += char
531 state = INSIDE_STRING
532 else:
533 raise ValueError(
534 'Can\'t process char at column %d for: %r' % (index, text),
535 index,
536 text)
537 elif char == '.':
538 if state == NEED_COMMA_OR_DOT:
539 # The string is incomplete, this mean the strace -s flag should be
540 # increased.
541 state = NEED_DOT_2
542 elif state == NEED_DOT_2:
543 state = NEED_DOT_3
544 elif state == NEED_DOT_3:
545 state = NEED_COMMA
546 elif state == INSIDE_STRING:
547 out[-1] += char
548 elif state == ESCAPED:
549 out[-1] += char
550 state = INSIDE_STRING
551 else:
552 raise ValueError(
553 'Can\'t process char at column %d for: %r' % (index, text),
554 index,
555 text)
556 elif char == '\\':
557 if state == ESCAPED:
558 out[-1] += char
559 state = INSIDE_STRING
560 elif state == INSIDE_STRING:
561 state = ESCAPED
562 else:
563 raise ValueError(
564 'Can\'t process char at column %d for: %r' % (index, text),
565 index,
566 text)
567 else:
568 if state == INSIDE_STRING:
569 out[-1] += char
570 else:
571 raise ValueError(
572 'Can\'t process char at column %d for: %r' % (index, text),
573 index,
574 text)
575 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
576 raise ValueError(
577 'String is incorrectly terminated: %r' % text,
578 text)
579 return out
580
581
582def read_json(filepath):
583 with open(filepath, 'r') as f:
584 return json.load(f)
585
586
587def write_json(filepath_or_handle, data, dense):
588 """Writes data into filepath or file handle encoded as json.
589
590 If dense is True, the json is packed. Otherwise, it is human readable.
591 """
592 if hasattr(filepath_or_handle, 'write'):
593 if dense:
594 filepath_or_handle.write(
595 json.dumps(data, sort_keys=True, separators=(',',':')))
596 else:
597 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
598 else:
599 with open(filepath_or_handle, 'wb') as f:
600 if dense:
601 json.dump(data, f, sort_keys=True, separators=(',',':'))
602 else:
603 json.dump(data, f, sort_keys=True, indent=2)
604
605
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000606def assert_is_renderable(pseudo_string):
607 """Asserts the input is a valid object to be processed by render()."""
608 assert (
609 isinstance(pseudo_string, (None.__class__, unicode)) or
610 hasattr(pseudo_string, 'render')), repr(pseudo_string)
611
612
613def render(pseudo_string):
614 """Converts the pseudo-string to an unicode string."""
615 assert_is_renderable(pseudo_string)
616 if isinstance(pseudo_string, (None.__class__, unicode)):
617 return pseudo_string
618 return pseudo_string.render()
619
620
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000621class Results(object):
622 """Results of a trace session."""
623
624 class _TouchedObject(object):
625 """Something, a file or a directory, that was accessed."""
626 def __init__(self, root, path, tainted, size, nb_files):
627 logging.debug(
628 '%s(%s, %s, %s, %s, %s)' %
629 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000630 assert_is_renderable(root)
631 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000632 self.root = root
633 self.path = path
634 self.tainted = tainted
635 self.nb_files = nb_files
636 # Can be used as a cache or a default value, depending on context.
637 self._size = size
638 # These are cache only.
639 self._real_path = None
640
641 # Check internal consistency.
642 assert path, path
643 assert tainted or bool(root) != bool(isabs(path)), (root, path)
644 assert tainted or (
645 not os.path.exists(self.full_path) or
646 (self.full_path == get_native_path_case(self.full_path))), (
647 tainted, self.full_path, get_native_path_case(self.full_path))
648
649 @property
650 def existent(self):
651 return self.size != -1
652
653 @property
654 def full_path(self):
655 if self.root:
656 return os.path.join(self.root, self.path)
657 return self.path
658
659 @property
660 def real_path(self):
661 """Returns the path with symlinks resolved."""
662 if not self._real_path:
663 self._real_path = os.path.realpath(self.full_path)
664 return self._real_path
665
666 @property
667 def size(self):
668 """File's size. -1 is not existent."""
669 if self._size is None and not self.tainted:
670 try:
671 self._size = os.stat(self.full_path).st_size
672 except OSError:
673 self._size = -1
674 return self._size
675
676 def flatten(self):
677 """Returns a dict representing this object.
678
679 A 'size' of 0 means the file was only touched and not read.
680 """
681 return {
682 'path': self.path,
683 'size': self.size,
684 }
685
686 def replace_variables(self, variables):
687 """Replaces the root of this File with one of the variables if it matches.
688
689 If a variable replacement occurs, the cloned object becomes tainted.
690 """
691 for variable, root_path in variables.iteritems():
692 if self.path.startswith(root_path):
693 return self._clone(
694 self.root, variable + self.path[len(root_path):], True)
695 # No need to clone, returns ourself.
696 return self
697
698 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000699 """Returns a clone of itself with 'root' stripped off.
700
701 Note that the file is kept if it is either accessible from a symlinked
702 path that was used to access the file or through the real path.
703 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000704 # Check internal consistency.
705 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root
706 if not self.full_path.startswith(root):
707 # Now try to resolve the symlinks to see if it can be reached this way.
708 # Only try *after* trying without resolving symlink.
709 if not self.real_path.startswith(root):
710 return None
711 path = self.real_path
712 else:
713 path = self.full_path
714 return self._clone(root, path[len(root):], self.tainted)
715
716 def _clone(self, new_root, new_path, tainted):
717 raise NotImplementedError(self.__class__.__name__)
718
719 class File(_TouchedObject):
720 """A file that was accessed. May not be present anymore.
721
722 If tainted is true, it means it is not a real path anymore as a variable
723 replacement occured.
724
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000725 If only_touched is True, this means the file was probed for existence, and
726 it is existent, but was never _opened_. If only_touched is True, the file
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000727 must have existed.
728 """
729 def __init__(self, root, path, tainted, size):
730 super(Results.File, self).__init__(root, path, tainted, size, 1)
731
732 def _clone(self, new_root, new_path, tainted):
733 """Clones itself keeping meta-data."""
734 # Keep the self.size and self._real_path caches for performance reason. It
735 # is also important when the file becomes tainted (with a variable instead
736 # of the real path) since self.path is not an on-disk path anymore so
737 # out._size cannot be updated.
738 out = self.__class__(new_root, new_path, tainted, self.size)
739 out._real_path = self._real_path
740 return out
741
742 class Directory(_TouchedObject):
743 """A directory of files. Must exist."""
744 def __init__(self, root, path, tainted, size, nb_files):
745 """path='.' is a valid value and must be handled appropriately."""
746 assert not path.endswith(os.path.sep), path
747 super(Results.Directory, self).__init__(
748 root, path + os.path.sep, tainted, size, nb_files)
749 # For a Directory instance, self.size is not a cache, it's an actual value
750 # that is never modified and represents the total size of the files
751 # contained in this directory. It is possible that the directory is empty
752 # so that size == 0; this happens if there's only an invalid symlink in
753 # it.
754
755 def flatten(self):
756 out = super(Results.Directory, self).flatten()
757 out['nb_files'] = self.nb_files
758 return out
759
760 def _clone(self, new_root, new_path, tainted):
761 """Clones itself keeping meta-data."""
762 out = self.__class__(
763 new_root,
764 new_path.rstrip(os.path.sep),
765 tainted,
766 self.size,
767 self.nb_files)
768 out._real_path = self._real_path
769 return out
770
771 class Process(object):
772 """A process that was traced.
773
774 Contains references to the files accessed by this process and its children.
775 """
776 def __init__(self, pid, files, executable, command, initial_cwd, children):
777 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
778 self.pid = pid
779 self.files = sorted(files, key=lambda x: x.path)
780 self.children = children
781 self.executable = executable
782 self.command = command
783 self.initial_cwd = initial_cwd
784
785 # Check internal consistency.
786 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
787 f.path for f in self.files)
788 assert isinstance(self.children, list)
789 assert isinstance(self.files, list)
790
791 @property
792 def all(self):
793 for child in self.children:
794 for i in child.all:
795 yield i
796 yield self
797
798 def flatten(self):
799 return {
800 'children': [c.flatten() for c in self.children],
801 'command': self.command,
802 'executable': self.executable,
803 'files': [f.flatten() for f in self.files],
804 'initial_cwd': self.initial_cwd,
805 'pid': self.pid,
806 }
807
808 def strip_root(self, root):
809 assert isabs(root) and root.endswith(os.path.sep), root
810 # Loads the files after since they are constructed as objects.
811 out = self.__class__(
812 self.pid,
813 filter(None, (f.strip_root(root) for f in self.files)),
814 self.executable,
815 self.command,
816 self.initial_cwd,
817 [c.strip_root(root) for c in self.children])
818 logging.debug(
819 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
820 return out
821
822 def __init__(self, process):
823 self.process = process
824 # Cache.
825 self._files = None
826
827 def flatten(self):
828 return {
829 'root': self.process.flatten(),
830 }
831
832 @property
833 def files(self):
834 if self._files is None:
835 self._files = sorted(
836 sum((p.files for p in self.process.all), []),
837 key=lambda x: x.path)
838 return self._files
839
840 @property
841 def existent(self):
842 return [f for f in self.files if f.existent]
843
844 @property
845 def non_existent(self):
846 return [f for f in self.files if not f.existent]
847
848 def strip_root(self, root):
849 """Returns a clone with all the files outside the directory |root| removed
850 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000851
852 It keeps files accessible through the |root| directory or that have been
853 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000854 """
855 # Resolve any symlink
856 root = os.path.realpath(root)
857 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep
858 logging.debug('strip_root(%s)' % root)
859 return Results(self.process.strip_root(root))
860
861
862class ApiBase(object):
863 """OS-agnostic API to trace a process and its children."""
864 class Context(object):
865 """Processes one log line at a time and keeps the list of traced processes.
866
867 The parsing is complicated by the fact that logs are traced out of order for
868 strace but in-order for dtrace and logman. In addition, on Windows it is
869 very frequent that processids are reused so a flat list cannot be used. But
870 at the same time, it is impossible to faithfully construct a graph when the
871 logs are processed out of order. So both a tree and a flat mapping are used,
872 the tree is the real process tree, while the flat mapping stores the last
873 valid process for the corresponding processid. For the strace case, the
874 tree's head is guessed at the last moment.
875 """
876 class Process(object):
877 """Keeps context for one traced child process.
878
879 Logs all the files this process touched. Ignores directories.
880 """
881 def __init__(self, blacklist, pid, initial_cwd):
882 # Check internal consistency.
883 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000884 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000885 self.pid = pid
886 # children are Process instances.
887 self.children = []
888 self.initial_cwd = initial_cwd
889 self.cwd = None
890 self.files = set()
891 self.only_touched = set()
892 self.executable = None
893 self.command = None
894 self._blacklist = blacklist
895
896 def to_results_process(self):
897 """Resolves file case sensitivity and or late-bound strings."""
898 # When resolving files, it's normal to get dupe because a file could be
899 # opened multiple times with different case. Resolve the deduplication
900 # here.
901 def fix_path(x):
902 """Returns the native file path case.
903
904 Converts late-bound strings.
905 """
906 if not x:
907 # Do not convert None instance to 'None'.
908 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000909 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000910 if os.path.isabs(x):
911 # If the path is not absolute, which tends to happen occasionally on
912 # Windows, it is not possible to get the native path case so ignore
913 # that trace. It mostly happens for 'executable' value.
914 x = get_native_path_case(x)
915 return x
916
917 def fix_and_blacklist_path(x):
918 x = fix_path(x)
919 if not x:
920 return
921 # The blacklist needs to be reapplied, since path casing could
922 # influence blacklisting.
923 if self._blacklist(x):
924 return
925 return x
926
927 # Filters out directories. Some may have passed through.
928 files = set(f for f in map(fix_and_blacklist_path, self.files) if f)
929 only_touched = set(
930 f for f in map(fix_and_blacklist_path, self.only_touched) if f)
931 only_touched -= files
932
933 files = [
934 Results.File(None, f, False, None) for f in files
935 if not os.path.isdir(f)
936 ]
937 # Using 0 as size means the file's content is ignored since the file was
938 # never opened for I/O.
939 files.extend(
940 Results.File(None, f, False, 0) for f in only_touched
941 if not os.path.isdir(f)
942 )
943 return Results.Process(
944 self.pid,
945 files,
946 fix_path(self.executable),
947 self.command,
948 fix_path(self.initial_cwd),
949 [c.to_results_process() for c in self.children])
950
951 def add_file(self, filepath, touch_only):
952 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000953 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000954 return
955 logging.debug('add_file(%d, %s, %s)' % (self.pid, filepath, touch_only))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000956 # Note that filepath and not render(filepath) is added. It is
957 # because filepath could be something else than a string, like a
958 # RelativePath instance for dtrace logs.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000959 if touch_only:
960 self.only_touched.add(filepath)
961 else:
962 self.files.add(filepath)
963
964 def __init__(self, blacklist):
965 self.blacklist = blacklist
966 # Initial process.
967 self.root_process = None
968 # dict to accelerate process lookup, to not have to lookup the whole graph
969 # each time.
970 self._process_lookup = {}
971
972 class Tracer(object):
973 """During it's lifetime, the tracing subsystem is enabled."""
974 def __init__(self, logname):
975 self._logname = logname
976 self._lock = threading.Lock()
977 self._traces = []
978 self._initialized = True
979
980 def trace(self, cmd, cwd, tracename, output):
981 """Runs the OS-specific trace program on an executable.
982
983 Arguments:
984 - cmd: The command (a list) to run.
985 - cwd: Current directory to start the child process in.
986 - tracename: Name of the trace in the logname file.
987 - output: If False, redirects output to PIPEs.
988
989 Returns a tuple (resultcode, output) and updates the internal trace
990 entries.
991 """
992 # The implementation adds an item to self._traces.
993 raise NotImplementedError(self.__class__.__name__)
994
995 def close(self, _timeout=None):
996 """Saves the meta-data in the logname file.
997
998 For kernel-based tracing, stops the tracing subsystem.
999
1000 Must not be used manually when using 'with' construct.
1001 """
1002 with self._lock:
1003 assert self._initialized
1004 try:
1005 data = {
1006 'traces': self._traces,
1007 }
1008 write_json(self._logname, data, False)
1009 finally:
1010 self._initialized = False
1011
1012 def post_process_log(self):
1013 """Post-processes the log so it becomes faster to load afterward.
1014
1015 Must not be used manually when using 'with' construct.
1016 """
1017 assert not self._initialized, 'Must stop tracing first.'
1018
1019 def __enter__(self):
1020 """Enables 'with' statement."""
1021 return self
1022
1023 def __exit__(self, exc_type, exc_value, traceback):
1024 """Enables 'with' statement."""
1025 self.close()
1026 # If an exception was thrown, do not process logs.
1027 if not exc_type:
1028 self.post_process_log()
1029
1030 def get_tracer(self, logname):
1031 """Returns an ApiBase.Tracer instance.
1032
1033 Initializes the tracing subsystem, which is a requirement for kernel-based
1034 tracers. Only one tracer instance should be live at a time!
1035
1036 logname is the filepath to the json file that will contain the meta-data
1037 about the logs.
1038 """
1039 return self.Tracer(logname)
1040
1041 @staticmethod
1042 def clean_trace(logname):
1043 """Deletes an old log."""
1044 raise NotImplementedError()
1045
1046 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001047 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001048 """Processes trace logs and returns the files opened and the files that do
1049 not exist.
1050
1051 It does not track directories.
1052
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001053 Arguments:
1054 - logname: must be an absolute path.
1055 - blacklist: must be a lambda.
1056 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001057
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001058 Most of the time, files that do not exist are temporary test files that
1059 should be put in /tmp instead. See http://crbug.com/116251.
1060
1061 Returns a list of dict with keys:
1062 - results: A Results instance.
1063 - trace: The corresponding tracename parameter provided to
1064 get_tracer().trace().
1065 - output: Output gathered during execution, if get_tracer().trace(...,
1066 output=False) was used.
1067 """
1068 raise NotImplementedError(cls.__class__.__name__)
1069
1070
1071class Strace(ApiBase):
1072 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001073 @staticmethod
1074 def load_filename(filename):
1075 """Parses a filename in a log."""
1076 assert isinstance(filename, str)
1077 out = ''
1078 i = 0
1079 while i < len(filename):
1080 c = filename[i]
1081 if c == '\\':
1082 out += chr(int(filename[i+1:i+4], 8))
1083 i += 4
1084 else:
1085 out += c
1086 i += 1
1087 # TODO(maruel): That's not necessarily true that the current code page is
1088 # utf-8.
1089 return out.decode('utf-8')
1090
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001091 class Context(ApiBase.Context):
1092 """Processes a strace log line and keeps the list of existent and non
1093 existent files accessed.
1094
1095 Ignores directories.
1096
1097 Uses late-binding to processes the cwd of each process. The problem is that
1098 strace generates one log file per process it traced but doesn't give any
1099 information about which process was started when and by who. So we don't
1100 even know which process is the initial one. So process the logs out of
1101 order and use late binding with RelativePath to be able to deduce the
1102 initial directory of each process once all the logs are parsed.
1103 """
1104 class Process(ApiBase.Context.Process):
1105 """Represents the state of a process.
1106
1107 Contains all the information retrieved from the pid-specific log.
1108 """
1109 # Function names are using ([a-z_0-9]+)
1110 # This is the most common format. function(args) = result
1111 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.+?)\)\s+= (.+)$')
1112 # An interrupted function call, only grab the minimal header.
1113 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001114 # An interrupted function call, with the process exiting. It must be the
1115 # last line in the log.
1116 RE_UNFINISHED_EXIT = re.compile(
1117 r'^([^\(]+)(.*) \<unfinished \.\.\.\ exit status \d+>$')
maruel@chromium.org4e506502012-10-18 15:02:58 +00001118 # An interrupted function call the hard way. Usually observed with futex()
1119 # on ubuntu 12.04.
csharp@chromium.org8cefa4c2013-01-16 19:33:47 +00001120 RE_INTERRUPTED_HARD = re.compile(r'^([^\(]+)\('
1121 '[A-Z0-9a-z:\,\_\|\{\}\(\)\>\< ]*$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001122 # A resumed function call.
1123 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
1124 # A process received a signal.
1125 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
1126 # A process didn't handle a signal. Ignore any junk appearing before,
1127 # because the process was forcibly killed so it won't open any new file.
1128 RE_KILLED = re.compile(
1129 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
1130 # The process has exited.
1131 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
1132 # A call was canceled. Ignore any prefix.
1133 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
1134 # Happens when strace fails to even get the function name.
1135 UNNAMED_FUNCTION = '????'
1136
1137 # Corner-case in python, a class member function decorator must not be
1138 # @staticmethod.
1139 def parse_args(regexp, expect_zero): # pylint: disable=E0213
1140 """Automatically convert the str 'args' into a list of processed
1141 arguments.
1142
1143 Arguments:
1144 - regexp is used to parse args.
1145 - expect_zero: one of True, False or None.
1146 - True: will check for result.startswith('0') first and will ignore
1147 the trace line completely otherwise. This is important because for
1148 many functions, the regexp will not process if the call failed.
1149 - False: will check for not result.startswith(('?', '-1')) for the
1150 same reason than with True.
1151 - None: ignore result.
1152 """
1153 def meta_hook(function):
1154 assert function.__name__.startswith('handle_')
1155 def hook(self, args, result):
1156 if expect_zero is True and not result.startswith('0'):
1157 return
1158 if expect_zero is False and result.startswith(('?', '-1')):
1159 return
1160 match = re.match(regexp, args)
1161 if not match:
1162 raise TracingFailure(
1163 'Failed to parse %s(%s) = %s' %
1164 (function.__name__[len('handle_'):], args, result),
1165 None, None, None)
1166 return function(self, match.groups(), result)
1167 return hook
1168 return meta_hook
1169
1170 class RelativePath(object):
1171 """A late-bound relative path."""
1172 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001173 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001174 self.parent = parent
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001175 assert isinstance(value, (None.__class__, str)), repr(value)
1176 self.value = Strace.load_filename(value) if value else value
1177 if self.value:
1178 assert '\\' not in self.value, value
1179 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001180
1181 def render(self):
1182 """Returns the current directory this instance is representing.
1183
1184 This function is used to return the late-bound value.
1185 """
1186 if self.value and self.value.startswith(u'/'):
1187 # An absolute path.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001188 # TODO(maruel): This is wrong, we can't assert it is utf-8.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001189 return self.value
1190 parent = self.parent.render() if self.parent else u'<None>'
1191 if self.value:
1192 return os.path.normpath(os.path.join(parent, self.value))
1193 return parent
1194
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001195 def __init__(self, root, pid):
1196 """Keeps enough information to be able to guess the original process
1197 root.
1198
1199 strace doesn't store which process was the initial process. So more
1200 information needs to be kept so the graph can be reconstructed from the
1201 flat map.
1202 """
1203 logging.info('%s(%d)' % (self.__class__.__name__, pid))
1204 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
1205 assert isinstance(root, ApiBase.Context)
1206 self._root = weakref.ref(root)
1207 # The dict key is the function name of the pending call, like 'open'
1208 # or 'execve'.
1209 self._pending_calls = {}
1210 self._line_number = 0
1211 # Current directory when the process started.
1212 self.initial_cwd = self.RelativePath(self._root(), None)
1213 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001214 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001215
1216 def get_cwd(self):
1217 """Returns the best known value of cwd."""
1218 return self.cwd or self.initial_cwd
1219
1220 def render(self):
1221 """Returns the string value of the RelativePath() object.
1222
1223 Used by RelativePath. Returns the initial directory and not the
1224 current one since the current directory 'cwd' validity is time-limited.
1225
1226 The validity is only guaranteed once all the logs are processed.
1227 """
1228 return self.initial_cwd.render()
1229
1230 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001231 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001232 self._line_number += 1
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001233 if self._done:
1234 raise TracingFailure(
1235 'Found a trace for a terminated process',
1236 None, None, None)
1237
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001238 if self.RE_SIGNAL.match(line):
1239 # Ignore signals.
1240 return
1241
1242 try:
1243 match = self.RE_KILLED.match(line)
1244 if match:
1245 # Converts a '+++ killed by Foo +++' trace into an exit_group().
1246 self.handle_exit_group(match.group(1), None)
1247 return
1248
1249 match = self.RE_PROCESS_EXITED.match(line)
1250 if match:
1251 # Converts a '+++ exited with 1 +++' trace into an exit_group()
1252 self.handle_exit_group(match.group(1), None)
1253 return
1254
1255 match = self.RE_UNFINISHED.match(line)
1256 if match:
1257 if match.group(1) in self._pending_calls:
1258 raise TracingFailure(
1259 'Found two unfinished calls for the same function',
1260 None, None, None,
1261 self._pending_calls)
1262 self._pending_calls[match.group(1)] = (
1263 match.group(1) + match.group(2))
1264 return
1265
maruel@chromium.org4e506502012-10-18 15:02:58 +00001266 match = (
1267 self.RE_UNFINISHED_EXIT.match(line) or
1268 self.RE_INTERRUPTED_HARD.match(line))
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001269 if match:
1270 # The process died. No other line can be processed afterward.
1271 self._done = True
1272 return
1273
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001274 match = self.RE_UNAVAILABLE.match(line)
1275 if match:
1276 # This usually means a process was killed and a pending call was
1277 # canceled.
1278 # TODO(maruel): Look up the last exit_group() trace just above and
1279 # make sure any self._pending_calls[anything] is properly flushed.
1280 return
1281
1282 match = self.RE_RESUMED.match(line)
1283 if match:
1284 if match.group(1) not in self._pending_calls:
1285 raise TracingFailure(
1286 'Found a resumed call that was not logged as unfinished',
1287 None, None, None,
1288 self._pending_calls)
1289 pending = self._pending_calls.pop(match.group(1))
1290 # Reconstruct the line.
1291 line = pending + match.group(2)
1292
1293 match = self.RE_HEADER.match(line)
1294 if not match:
1295 raise TracingFailure(
1296 'Found an invalid line: %s' % line,
1297 None, None, None)
1298 if match.group(1) == self.UNNAMED_FUNCTION:
1299 return
1300
1301 # It's a valid line, handle it.
1302 handler = getattr(self, 'handle_%s' % match.group(1), None)
1303 if not handler:
1304 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1305 return handler(match.group(2), match.group(3))
1306 except TracingFailure, e:
1307 # Hack in the values since the handler could be a static function.
1308 e.pid = self.pid
1309 e.line = line
1310 e.line_number = self._line_number
1311 # Re-raise the modified exception.
1312 raise
1313 except (KeyError, NotImplementedError, ValueError), e:
1314 raise TracingFailure(
1315 'Trace generated a %s exception: %s' % (
1316 e.__class__.__name__, str(e)),
1317 self.pid,
1318 self._line_number,
1319 line,
1320 e)
1321
1322 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1323 def handle_access(self, args, _result):
1324 self._handle_file(args[0], True)
1325
1326 @parse_args(r'^\"(.+?)\"$', True)
1327 def handle_chdir(self, args, _result):
1328 """Updates cwd."""
1329 self.cwd = self.RelativePath(self, args[0])
1330 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1331
1332 def handle_clone(self, _args, result):
1333 """Transfers cwd."""
1334 if result.startswith(('?', '-1')):
1335 # The call failed.
1336 return
1337 # Update the other process right away.
1338 childpid = int(result)
1339 child = self._root().get_or_set_proc(childpid)
1340 if child.parentid is not None or childpid in self.children:
1341 raise TracingFailure(
1342 'Found internal inconsitency in process lifetime detection '
1343 'during a clone() call',
1344 None, None, None)
1345
1346 # Copy the cwd object.
1347 child.initial_cwd = self.get_cwd()
1348 child.parentid = self.pid
1349 # It is necessary because the logs are processed out of order.
1350 self.children.append(child)
1351
1352 def handle_close(self, _args, _result):
1353 pass
1354
1355 def handle_chmod(self, _args, _result):
1356 pass
1357
1358 def handle_creat(self, _args, _result):
1359 # Ignore files created, since they didn't need to exist.
1360 pass
1361
1362 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1363 def handle_execve(self, args, _result):
1364 # Even if in practice execve() doesn't returns when it succeeds, strace
1365 # still prints '0' as the result.
1366 filepath = args[0]
1367 self._handle_file(filepath, False)
1368 self.executable = self.RelativePath(self.get_cwd(), filepath)
1369 self.command = strace_process_quoted_arguments(args[1])
1370
1371 def handle_exit_group(self, _args, _result):
1372 """Removes cwd."""
1373 self.cwd = None
1374
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001375 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1376 def handle_faccessat(self, args, _results):
1377 if args[0] == 'AT_FDCWD':
1378 self._handle_file(args[1], True)
1379 else:
1380 raise Exception('Relative faccess not implemented.')
1381
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001382 def handle_fork(self, args, result):
1383 self._handle_unknown('fork', args, result)
1384
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001385 def handle_futex(self, _args, _result):
1386 pass
1387
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001388 def handle_getcwd(self, _args, _result):
1389 pass
1390
1391 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1392 def handle_link(self, args, _result):
1393 self._handle_file(args[0], False)
1394 self._handle_file(args[1], False)
1395
1396 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1397 def handle_lstat(self, args, _result):
1398 self._handle_file(args[0], True)
1399
1400 def handle_mkdir(self, _args, _result):
1401 pass
1402
1403 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1404 def handle_open(self, args, _result):
1405 if 'O_DIRECTORY' in args[1]:
1406 return
1407 self._handle_file(args[0], False)
1408
1409 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1410 def handle_openat(self, args, _result):
1411 if 'O_DIRECTORY' in args[2]:
1412 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001413 if args[0] == 'AT_FDCWD':
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001414 self._handle_file(args[1], False)
1415 else:
1416 # TODO(maruel): Implement relative open if necessary instead of the
1417 # AT_FDCWD flag, let's hope not since this means tracking all active
1418 # directory handles.
1419 raise Exception('Relative open via openat not implemented.')
1420
1421 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1422 def handle_readlink(self, args, _result):
1423 self._handle_file(args[0], False)
1424
1425 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1426 def handle_rename(self, args, _result):
1427 self._handle_file(args[0], False)
1428 self._handle_file(args[1], False)
1429
1430 def handle_rmdir(self, _args, _result):
1431 pass
1432
1433 def handle_setxattr(self, _args, _result):
1434 pass
1435
1436 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1437 def handle_stat(self, args, _result):
1438 self._handle_file(args[0], True)
1439
1440 def handle_symlink(self, _args, _result):
1441 pass
1442
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001443 @parse_args(r'^\"(.+?)\", \d+', True)
1444 def handle_truncate(self, args, _result):
1445 self._handle_file(args[0], False)
1446
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001447 def handle_unlink(self, _args, _result):
1448 # In theory, the file had to be created anyway.
1449 pass
1450
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001451 def handle_unlinkat(self, _args, _result):
1452 # In theory, the file had to be created anyway.
1453 pass
1454
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001455 def handle_statfs(self, _args, _result):
1456 pass
1457
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001458 def handle_utimensat(self, _args, _result):
1459 pass
1460
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001461 def handle_vfork(self, args, result):
1462 self._handle_unknown('vfork', args, result)
1463
1464 @staticmethod
1465 def _handle_unknown(function, args, result):
1466 raise TracingFailure(
1467 'Unexpected/unimplemented trace %s(%s)= %s' %
1468 (function, args, result),
1469 None, None, None)
1470
1471 def _handle_file(self, filepath, touch_only):
1472 filepath = self.RelativePath(self.get_cwd(), filepath)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001473 #assert not touch_only, render(filepath)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001474 self.add_file(filepath, touch_only)
1475
1476 def __init__(self, blacklist, initial_cwd):
1477 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001478 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001479 self.initial_cwd = initial_cwd
1480
1481 def render(self):
1482 """Returns the string value of the initial cwd of the root process.
1483
1484 Used by RelativePath.
1485 """
1486 return self.initial_cwd
1487
1488 def on_line(self, pid, line):
1489 """Transfers control into the Process.on_line() function."""
1490 self.get_or_set_proc(pid).on_line(line.strip())
1491
1492 def to_results(self):
1493 """Finds back the root process and verify consistency."""
1494 # TODO(maruel): Absolutely unecessary, fix me.
1495 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1496 if len(root) != 1:
1497 raise TracingFailure(
1498 'Found internal inconsitency in process lifetime detection '
1499 'while finding the root process',
1500 None,
1501 None,
1502 None,
1503 sorted(p.pid for p in root))
1504 self.root_process = root[0]
1505 process = self.root_process.to_results_process()
1506 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1507 raise TracingFailure(
1508 'Found internal inconsitency in process lifetime detection '
1509 'while looking for len(tree) == len(list)',
1510 None,
1511 None,
1512 None,
1513 sorted(self._process_lookup),
1514 sorted(p.pid for p in process.all))
1515 return Results(process)
1516
1517 def get_or_set_proc(self, pid):
1518 """Returns the Context.Process instance for this pid or creates a new one.
1519 """
1520 if not pid or not isinstance(pid, int):
1521 raise TracingFailure(
1522 'Unpexpected value for pid: %r' % pid,
1523 pid,
1524 None,
1525 None,
1526 pid)
1527 if pid not in self._process_lookup:
1528 self._process_lookup[pid] = self.Process(self, pid)
1529 return self._process_lookup[pid]
1530
1531 @classmethod
1532 def traces(cls):
1533 """Returns the list of all handled traces to pass this as an argument to
1534 strace.
1535 """
1536 prefix = 'handle_'
1537 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1538
1539 class Tracer(ApiBase.Tracer):
1540 MAX_LEN = 256
1541
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001542 def __init__(self, logname, use_sudo):
1543 super(Strace.Tracer, self).__init__(logname)
1544 self.use_sudo = use_sudo
1545
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001546 def trace(self, cmd, cwd, tracename, output):
1547 """Runs strace on an executable."""
1548 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1549 assert os.path.isabs(cmd[0]), cmd[0]
1550 assert os.path.isabs(cwd), cwd
1551 assert os.path.normpath(cwd) == cwd, cwd
1552 with self._lock:
1553 if not self._initialized:
1554 raise TracingFailure(
1555 'Called Tracer.trace() on an unitialized object',
1556 None, None, None, tracename)
1557 assert tracename not in (i['trace'] for i in self._traces)
1558 stdout = stderr = None
1559 if output:
1560 stdout = subprocess.PIPE
1561 stderr = subprocess.STDOUT
1562 # Ensure all file related APIs are hooked.
1563 traces = ','.join(Strace.Context.traces() + ['file'])
1564 trace_cmd = [
1565 'strace',
1566 '-ff',
1567 '-s', '%d' % self.MAX_LEN,
1568 '-e', 'trace=%s' % traces,
1569 '-o', self._logname + '.' + tracename,
1570 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001571 if self.use_sudo is True:
1572 trace_cmd.insert(0, 'sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001573 child = subprocess.Popen(
1574 trace_cmd + cmd,
1575 cwd=cwd,
1576 stdin=subprocess.PIPE,
1577 stdout=stdout,
1578 stderr=stderr)
1579 out = child.communicate()[0]
1580 # TODO(maruel): Walk the logs and figure out the root process would
1581 # simplify parsing the logs a *lot*.
1582 with self._lock:
1583 assert tracename not in (i['trace'] for i in self._traces)
1584 self._traces.append(
1585 {
1586 'cmd': cmd,
1587 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001588 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001589 # The pid of strace process, not very useful.
1590 'pid': child.pid,
1591 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001592 })
1593 return child.returncode, out
1594
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001595 def __init__(self, use_sudo=None):
1596 super(Strace, self).__init__()
1597 self.use_sudo = use_sudo
1598
1599 def get_tracer(self, logname):
1600 return self.Tracer(logname, self.use_sudo)
1601
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001602 @staticmethod
1603 def clean_trace(logname):
1604 if os.path.isfile(logname):
1605 os.remove(logname)
1606 # Also delete any pid specific file from previous traces.
1607 for i in glob.iglob(logname + '.*'):
1608 if i.rsplit('.', 1)[1].isdigit():
1609 os.remove(i)
1610
1611 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001612 def parse_log(cls, logname, blacklist, trace_name):
1613 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001614 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001615 data = read_json(logname)
1616 out = []
1617 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001618 if trace_name and item['trace'] != trace_name:
1619 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001620 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001621 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001622 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001623 }
1624 try:
1625 context = cls.Context(blacklist, item['cwd'])
1626 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
1627 pid = pidfile.rsplit('.', 1)[1]
1628 if pid.isdigit():
1629 pid = int(pid)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001630 for line in open(pidfile, 'rb'):
1631 context.on_line(pid, line)
1632 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001633 except TracingFailure:
1634 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001635 out.append(result)
1636 return out
1637
1638
1639class Dtrace(ApiBase):
1640 """Uses DTrace framework through dtrace. Requires root access.
1641
1642 Implies Mac OSX.
1643
1644 dtruss can't be used because it has compatibility issues with python.
1645
1646 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1647 get the absolute path of the 'cwd' dtrace variable from the probe.
1648
1649 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1650 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1651 manually.
1652
1653 errno is not printed in the log since this implementation currently only cares
1654 about files that were successfully opened.
1655 """
1656 class Context(ApiBase.Context):
1657 # Format: index pid function(args)
1658 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1659
1660 # Arguments parsing.
1661 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1662 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1663 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1664 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1665 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1666 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1667
1668 O_DIRECTORY = 0x100000
1669
1670 class Process(ApiBase.Context.Process):
1671 def __init__(self, *args):
1672 super(Dtrace.Context.Process, self).__init__(*args)
1673 self.cwd = self.initial_cwd
1674
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001675 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001676 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001677 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001678 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001679 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001680 # Process ID of the temporary script created by create_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001681 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001682 self._initial_cwd = initial_cwd
1683 self._line_number = 0
1684
1685 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001686 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001687 self._line_number += 1
1688 match = self.RE_HEADER.match(line)
1689 if not match:
1690 raise TracingFailure(
1691 'Found malformed line: %s' % line,
1692 None,
1693 self._line_number,
1694 line)
1695 fn = getattr(
1696 self,
1697 'handle_%s' % match.group(2).replace('-', '_'),
1698 self._handle_ignored)
1699 # It is guaranteed to succeed because of the regexp. Or at least I thought
1700 # it would.
1701 pid = int(match.group(1))
1702 try:
1703 return fn(pid, match.group(3))
1704 except TracingFailure, e:
1705 # Hack in the values since the handler could be a static function.
1706 e.pid = pid
1707 e.line = line
1708 e.line_number = self._line_number
1709 # Re-raise the modified exception.
1710 raise
1711 except (KeyError, NotImplementedError, ValueError), e:
1712 raise TracingFailure(
1713 'Trace generated a %s exception: %s' % (
1714 e.__class__.__name__, str(e)),
1715 pid,
1716 self._line_number,
1717 line,
1718 e)
1719
1720 def to_results(self):
1721 process = self.root_process.to_results_process()
1722 # Internal concistency check.
1723 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1724 raise TracingFailure(
1725 'Found internal inconsitency in process lifetime detection '
1726 'while looking for len(tree) == len(list)',
1727 None,
1728 None,
1729 None,
1730 sorted(self._process_lookup),
1731 sorted(p.pid for p in process.all))
1732 return Results(process)
1733
1734 def handle_dtrace_BEGIN(self, _pid, args):
1735 if not self.RE_DTRACE_BEGIN.match(args):
1736 raise TracingFailure(
1737 'Found internal inconsitency in dtrace_BEGIN log line',
1738 None, None, None)
1739
1740 def handle_proc_start(self, pid, args):
1741 """Transfers cwd.
1742
1743 The dtrace script already takes care of only tracing the processes that
1744 are child of the traced processes so there is no need to verify the
1745 process hierarchy.
1746 """
1747 if pid in self._process_lookup:
1748 raise TracingFailure(
1749 'Found internal inconsitency in proc_start: %d started two times' %
1750 pid,
1751 None, None, None)
1752 match = self.RE_PROC_START.match(args)
1753 if not match:
1754 raise TracingFailure(
1755 'Failed to parse arguments: %s' % args,
1756 None, None, None)
1757 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001758 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001759 proc = self.root_process = self.Process(
1760 self.blacklist, pid, self._initial_cwd)
1761 elif ppid in self._process_lookup:
1762 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1763 self._process_lookup[ppid].children.append(proc)
1764 else:
1765 # Another process tree, ignore.
1766 return
1767 self._process_lookup[pid] = proc
1768 logging.debug(
1769 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001770 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001771
1772 def handle_proc_exit(self, pid, _args):
1773 """Removes cwd."""
1774 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001775 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001776 self._process_lookup[pid].cwd = None
1777
1778 def handle_execve(self, pid, args):
1779 """Sets the process' executable.
1780
1781 TODO(maruel): Read command line arguments. See
1782 https://discussions.apple.com/thread/1980539 for an example.
1783 https://gist.github.com/1242279
1784
1785 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1786 :)
1787 """
1788 if not pid in self._process_lookup:
1789 # Another process tree, ignore.
1790 return
1791 match = self.RE_EXECVE.match(args)
1792 if not match:
1793 raise TracingFailure(
1794 'Failed to parse arguments: %r' % args,
1795 None, None, None)
1796 proc = self._process_lookup[pid]
1797 proc.executable = match.group(1)
1798 proc.command = self.process_escaped_arguments(match.group(3))
1799 if int(match.group(2)) != len(proc.command):
1800 raise TracingFailure(
1801 'Failed to parse execve() arguments: %s' % args,
1802 None, None, None)
1803
1804 def handle_chdir(self, pid, args):
1805 """Updates cwd."""
1806 if pid not in self._process_lookup:
1807 # Another process tree, ignore.
1808 return
1809 cwd = self.RE_CHDIR.match(args).group(1)
1810 if not cwd.startswith('/'):
1811 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1812 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1813 else:
1814 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1815 cwd2 = cwd
1816 self._process_lookup[pid].cwd = cwd2
1817
1818 def handle_open_nocancel(self, pid, args):
1819 """Redirects to handle_open()."""
1820 return self.handle_open(pid, args)
1821
1822 def handle_open(self, pid, args):
1823 if pid not in self._process_lookup:
1824 # Another process tree, ignore.
1825 return
1826 match = self.RE_OPEN.match(args)
1827 if not match:
1828 raise TracingFailure(
1829 'Failed to parse arguments: %s' % args,
1830 None, None, None)
1831 flag = int(match.group(2), 16)
1832 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1833 # Ignore directories.
1834 return
1835 self._handle_file(pid, match.group(1))
1836
1837 def handle_rename(self, pid, args):
1838 if pid not in self._process_lookup:
1839 # Another process tree, ignore.
1840 return
1841 match = self.RE_RENAME.match(args)
1842 if not match:
1843 raise TracingFailure(
1844 'Failed to parse arguments: %s' % args,
1845 None, None, None)
1846 self._handle_file(pid, match.group(1))
1847 self._handle_file(pid, match.group(2))
1848
1849 def _handle_file(self, pid, filepath):
1850 if not filepath.startswith('/'):
1851 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1852 # We can get '..' in the path.
1853 filepath = os.path.normpath(filepath)
1854 # Sadly, still need to filter out directories here;
1855 # saw open_nocancel(".", 0, 0) = 0 lines.
1856 if os.path.isdir(filepath):
1857 return
1858 self._process_lookup[pid].add_file(filepath, False)
1859
1860 def handle_ftruncate(self, pid, args):
1861 """Just used as a signal to kill dtrace, ignoring."""
1862 pass
1863
1864 @staticmethod
1865 def _handle_ignored(pid, args):
1866 """Is called for all the event traces that are not handled."""
1867 raise NotImplementedError('Please implement me')
1868
1869 @staticmethod
1870 def process_escaped_arguments(text):
1871 """Extracts escaped arguments on a string and return the arguments as a
1872 list.
1873
1874 Implemented as an automaton.
1875
1876 Example:
1877 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1878 function will return ['python2.7', '-c', 'print("hi")]
1879 """
1880 if not text.endswith('\\0'):
1881 raise ValueError('String is not null terminated: %r' % text, text)
1882 text = text[:-2]
1883
1884 def unescape(x):
1885 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1886 out = []
1887 escaped = False
1888 for i in x:
1889 if i == '\\' and not escaped:
1890 escaped = True
1891 continue
1892 escaped = False
1893 out.append(i)
1894 return ''.join(out)
1895
1896 return [unescape(i) for i in text.split('\\001')]
1897
1898 class Tracer(ApiBase.Tracer):
1899 # pylint: disable=C0301
1900 #
1901 # To understand the following code, you'll want to take a look at:
1902 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
1903 # https://wikis.oracle.com/display/DTrace/Variables
1904 # http://docs.oracle.com/cd/E19205-01/820-4221/
1905 #
1906 # 0. Dump all the valid probes into a text file. It is important, you
1907 # want to redirect into a file and you don't want to constantly 'sudo'.
1908 # $ sudo dtrace -l > probes.txt
1909 #
1910 # 1. Count the number of probes:
1911 # $ wc -l probes.txt
1912 # 81823 # On OSX 10.7, including 1 header line.
1913 #
1914 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1915 # likes and skipping the header with NR>1:
1916 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1917 # dtrace
1918 # fbt
1919 # io
1920 # ip
1921 # lockstat
1922 # mach_trap
1923 # proc
1924 # profile
1925 # sched
1926 # syscall
1927 # tcp
1928 # vminfo
1929 #
1930 # 3. List of valid probes:
1931 # $ grep syscall probes.txt | less
1932 # or use dtrace directly:
1933 # $ sudo dtrace -l -P syscall | less
1934 #
1935 # trackedpid is an associative array where its value can be 0, 1 or 2.
1936 # 0 is for untracked processes and is the default value for items not
1937 # in the associative array.
1938 # 1 is for tracked processes.
1939 # 2 is for the script created by create_thunk() only. It is not tracked
1940 # itself but all its decendants are.
1941 #
1942 # The script will kill itself only once waiting_to_die == 1 and
1943 # current_processes == 0, so that both getlogin() was called and that
1944 # all traced processes exited.
1945 #
1946 # TODO(maruel): Use cacheable predicates. See
1947 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1948 D_CODE = """
1949 dtrace:::BEGIN {
1950 waiting_to_die = 0;
1951 current_processes = 0;
1952 logindex = 0;
1953 printf("%d %d %s_%s(\\"%s\\")\\n",
1954 logindex, PID, probeprov, probename, SCRIPT);
1955 logindex++;
1956 }
1957
1958 proc:::start /trackedpid[ppid]/ {
1959 trackedpid[pid] = 1;
1960 current_processes += 1;
1961 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1962 logindex, pid, probeprov, probename,
1963 ppid,
1964 execname,
1965 current_processes);
1966 logindex++;
1967 }
1968 /* Should use SCRIPT but there is no access to this variable at that
1969 * point. */
1970 proc:::start /ppid == PID && execname == "Python"/ {
1971 trackedpid[pid] = 2;
1972 current_processes += 1;
1973 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1974 logindex, pid, probeprov, probename,
1975 ppid,
1976 execname,
1977 current_processes);
1978 logindex++;
1979 }
1980 proc:::exit /trackedpid[pid] &&
1981 current_processes == 1 &&
1982 waiting_to_die == 1/ {
1983 trackedpid[pid] = 0;
1984 current_processes -= 1;
1985 printf("%d %d %s_%s(%d)\\n",
1986 logindex, pid, probeprov, probename,
1987 current_processes);
1988 logindex++;
1989 exit(0);
1990 }
1991 proc:::exit /trackedpid[pid]/ {
1992 trackedpid[pid] = 0;
1993 current_processes -= 1;
1994 printf("%d %d %s_%s(%d)\\n",
1995 logindex, pid, probeprov, probename,
1996 current_processes);
1997 logindex++;
1998 }
1999
2000 /* Use an arcane function to detect when we need to die */
2001 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
2002 waiting_to_die = 1;
2003 printf("%d %d %s()\\n", logindex, pid, probefunc);
2004 logindex++;
2005 }
2006 syscall::ftruncate:entry /
2007 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
2008 exit(0);
2009 }
2010
2011 syscall::open*:entry /trackedpid[pid] == 1/ {
2012 self->open_arg0 = arg0;
2013 self->open_arg1 = arg1;
2014 self->open_arg2 = arg2;
2015 }
2016 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
2017 this->open_arg0 = copyinstr(self->open_arg0);
2018 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
2019 logindex, pid, probefunc,
2020 this->open_arg0,
2021 self->open_arg1,
2022 self->open_arg2);
2023 logindex++;
2024 this->open_arg0 = 0;
2025 }
2026 syscall::open*:return /trackedpid[pid] == 1/ {
2027 self->open_arg0 = 0;
2028 self->open_arg1 = 0;
2029 self->open_arg2 = 0;
2030 }
2031
2032 syscall::rename:entry /trackedpid[pid] == 1/ {
2033 self->rename_arg0 = arg0;
2034 self->rename_arg1 = arg1;
2035 }
2036 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
2037 this->rename_arg0 = copyinstr(self->rename_arg0);
2038 this->rename_arg1 = copyinstr(self->rename_arg1);
2039 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
2040 logindex, pid, probefunc,
2041 this->rename_arg0,
2042 this->rename_arg1);
2043 logindex++;
2044 this->rename_arg0 = 0;
2045 this->rename_arg1 = 0;
2046 }
2047 syscall::rename:return /trackedpid[pid] == 1/ {
2048 self->rename_arg0 = 0;
2049 self->rename_arg1 = 0;
2050 }
2051
2052 /* Track chdir, it's painful because it is only receiving relative path.
2053 */
2054 syscall::chdir:entry /trackedpid[pid] == 1/ {
2055 self->chdir_arg0 = arg0;
2056 }
2057 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
2058 this->chdir_arg0 = copyinstr(self->chdir_arg0);
2059 printf("%d %d %s(\\"%s\\")\\n",
2060 logindex, pid, probefunc,
2061 this->chdir_arg0);
2062 logindex++;
2063 this->chdir_arg0 = 0;
2064 }
2065 syscall::chdir:return /trackedpid[pid] == 1/ {
2066 self->chdir_arg0 = 0;
2067 }
2068 """
2069
2070 # execve-specific code, tends to throw a lot of exceptions.
2071 D_CODE_EXECVE = """
2072 /* Finally what we care about! */
2073 syscall::exec*:entry /trackedpid[pid]/ {
2074 self->exec_arg0 = copyinstr(arg0);
2075 /* Incrementally probe for a NULL in the argv parameter of execve() to
2076 * figure out argc. */
2077 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
2078 * found. */
2079 self->exec_argc = 0;
2080 /* Probe for argc==1 */
2081 this->exec_argv = (user_addr_t*)copyin(
2082 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2083 self->exec_argc = this->exec_argv[self->exec_argc] ?
2084 (self->exec_argc + 1) : self->exec_argc;
2085
2086 /* Probe for argc==2 */
2087 this->exec_argv = (user_addr_t*)copyin(
2088 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2089 self->exec_argc = this->exec_argv[self->exec_argc] ?
2090 (self->exec_argc + 1) : self->exec_argc;
2091
2092 /* Probe for argc==3 */
2093 this->exec_argv = (user_addr_t*)copyin(
2094 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2095 self->exec_argc = this->exec_argv[self->exec_argc] ?
2096 (self->exec_argc + 1) : self->exec_argc;
2097
2098 /* Probe for argc==4 */
2099 this->exec_argv = (user_addr_t*)copyin(
2100 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2101 self->exec_argc = this->exec_argv[self->exec_argc] ?
2102 (self->exec_argc + 1) : self->exec_argc;
2103
2104 /* Copy the inputs strings since there is no guarantee they'll be
2105 * present after the call completed. */
2106 self->exec_argv0 = (self->exec_argc > 0) ?
2107 copyinstr(this->exec_argv[0]) : "";
2108 self->exec_argv1 = (self->exec_argc > 1) ?
2109 copyinstr(this->exec_argv[1]) : "";
2110 self->exec_argv2 = (self->exec_argc > 2) ?
2111 copyinstr(this->exec_argv[2]) : "";
2112 self->exec_argv3 = (self->exec_argc > 3) ?
2113 copyinstr(this->exec_argv[3]) : "";
2114 this->exec_argv = 0;
2115 }
2116 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2117 /* We need to join strings here, as using multiple printf() would
2118 * cause tearing when multiple threads/processes are traced.
2119 * Since it is impossible to escape a string and join it to another one,
2120 * like sprintf("%s%S", previous, more), use hackery.
2121 * Each of the elements are split with a \\1. \\0 cannot be used because
2122 * it is simply ignored. This will conflict with any program putting a
2123 * \\1 in their execve() string but this should be "rare enough" */
2124 this->args = "";
2125 /* Process exec_argv[0] */
2126 this->args = strjoin(
2127 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2128
2129 /* Process exec_argv[1] */
2130 this->args = strjoin(
2131 this->args, (self->exec_argc > 1) ? "\\1" : "");
2132 this->args = strjoin(
2133 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2134
2135 /* Process exec_argv[2] */
2136 this->args = strjoin(
2137 this->args, (self->exec_argc > 2) ? "\\1" : "");
2138 this->args = strjoin(
2139 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2140
2141 /* Process exec_argv[3] */
2142 this->args = strjoin(
2143 this->args, (self->exec_argc > 3) ? "\\1" : "");
2144 this->args = strjoin(
2145 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2146
2147 /* Prints self->exec_argc to permits verifying the internal
2148 * consistency since this code is quite fishy. */
2149 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2150 logindex, pid, probefunc,
2151 self->exec_arg0,
2152 self->exec_argc,
2153 this->args);
2154 logindex++;
2155 this->args = 0;
2156 }
2157 syscall::exec*:return /trackedpid[pid]/ {
2158 self->exec_arg0 = 0;
2159 self->exec_argc = 0;
2160 self->exec_argv0 = 0;
2161 self->exec_argv1 = 0;
2162 self->exec_argv2 = 0;
2163 self->exec_argv3 = 0;
2164 }
2165 """
2166
2167 # Code currently not used.
2168 D_EXTRANEOUS = """
2169 /* This is a good learning experience, since it traces a lot of things
2170 * related to the process and child processes.
2171 * Warning: it generates a gigantic log. For example, tracing
2172 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2173 * several minutes to execute.
2174 */
2175 /*
2176 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2177 printf("%d %d %s_%s() = %d\\n",
2178 logindex, pid, probeprov, probefunc, errno);
2179 logindex++;
2180 }
2181 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2182 printf("%d %d %s_%s() = %d\\n",
2183 logindex, pid, probeprov, probefunc, errno);
2184 logindex++;
2185 }
2186 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2187 printf("%d %d %s_%s() = %d\\n",
2188 logindex, pid, probeprov, probefunc, errno);
2189 logindex++;
2190 }
2191 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2192 printf("%d %d %s_%s() = %d\\n",
2193 logindex, pid, probeprov, probefunc, errno);
2194 logindex++;
2195 }
2196 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2197 printf("%d %d %s_%s() = %d\\n",
2198 logindex, pid, probeprov, probefunc, errno);
2199 logindex++;
2200 }
2201 */
2202 /* TODO(maruel): *stat* functions and friends
2203 syscall::access:return,
2204 syscall::chdir:return,
2205 syscall::chflags:return,
2206 syscall::chown:return,
2207 syscall::chroot:return,
2208 syscall::getattrlist:return,
2209 syscall::getxattr:return,
2210 syscall::lchown:return,
2211 syscall::lstat64:return,
2212 syscall::lstat:return,
2213 syscall::mkdir:return,
2214 syscall::pathconf:return,
2215 syscall::readlink:return,
2216 syscall::removexattr:return,
2217 syscall::setxattr:return,
2218 syscall::stat64:return,
2219 syscall::stat:return,
2220 syscall::truncate:return,
2221 syscall::unlink:return,
2222 syscall::utimes:return,
2223 */
2224 """
2225
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002226 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002227 """Starts the log collection with dtrace.
2228
2229 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2230 this needs to wait for dtrace to be "warmed up".
2231 """
2232 super(Dtrace.Tracer, self).__init__(logname)
2233 self._script = create_thunk()
2234 # This unique dummy temp file is used to signal the dtrace script that it
2235 # should stop as soon as all the child processes are done. A bit hackish
2236 # but works fine enough.
2237 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2238 prefix='trace_signal_file')
2239
2240 # Note: do not use the -p flag. It's useless if the initial process quits
2241 # too fast, resulting in missing traces from the grand-children. The D
2242 # code manages the dtrace lifetime itself.
2243 trace_cmd = [
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002244 'dtrace',
2245 # Use a larger buffer if getting 'out of scratch space' errors.
2246 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2247 '-b', '10m',
2248 '-x', 'dynvarsize=10m',
2249 #'-x', 'dtrace_global_maxsize=1m',
2250 '-x', 'evaltime=exec',
2251 '-o', '/dev/stderr',
2252 '-q',
2253 '-n', self._get_dtrace_code(),
2254 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002255 if use_sudo is not False:
2256 trace_cmd.insert(0, 'sudo')
2257
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002258 with open(self._logname + '.log', 'wb') as logfile:
2259 self._dtrace = subprocess.Popen(
2260 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2261 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2262
2263 # Reads until one line is printed, which signifies dtrace is up and ready.
2264 with open(self._logname + '.log', 'rb') as logfile:
2265 while 'dtrace_BEGIN' not in logfile.readline():
2266 if self._dtrace.poll() is not None:
2267 # Do a busy wait. :/
2268 break
2269 logging.debug('dtrace started')
2270
2271 def _get_dtrace_code(self):
2272 """Setups the D code to implement child process tracking.
2273
2274 Injects the cookie in the script so it knows when to stop.
2275
2276 The script will detect any instance of the script created with
2277 create_thunk() and will start tracing it.
2278 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002279 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002280 'inline int PID = %d;\n'
2281 'inline string SCRIPT = "%s";\n'
2282 'inline int FILE_ID = %d;\n'
2283 '\n'
2284 '%s') % (
2285 os.getpid(),
2286 self._script,
2287 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002288 self.D_CODE)
2289 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2290 # Do not enable by default since it tends to spew dtrace: error lines
2291 # because the execve() parameters are not in valid memory at the time of
2292 # logging.
2293 # TODO(maruel): Find a way to make this reliable since it's useful but
2294 # only works in limited/trivial uses cases for now.
2295 out += self.D_CODE_EXECVE
2296 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002297
2298 def trace(self, cmd, cwd, tracename, output):
2299 """Runs dtrace on an executable.
2300
2301 This dtruss is broken when it starts the process itself or when tracing
2302 child processes, this code starts a wrapper process
2303 generated with create_thunk() which starts the executable to trace.
2304 """
2305 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2306 assert os.path.isabs(cmd[0]), cmd[0]
2307 assert os.path.isabs(cwd), cwd
2308 assert os.path.normpath(cwd) == cwd, cwd
2309 with self._lock:
2310 if not self._initialized:
2311 raise TracingFailure(
2312 'Called Tracer.trace() on an unitialized object',
2313 None, None, None, tracename)
2314 assert tracename not in (i['trace'] for i in self._traces)
2315
2316 # Starts the script wrapper to start the child process. This signals the
2317 # dtrace script that this process is to be traced.
2318 stdout = stderr = None
2319 if output:
2320 stdout = subprocess.PIPE
2321 stderr = subprocess.STDOUT
2322 child_cmd = [
2323 sys.executable,
2324 self._script,
2325 tracename,
2326 ]
2327 # Call a dummy function so that dtrace knows I'm about to launch a process
2328 # that needs to be traced.
2329 # Yummy.
2330 child = subprocess.Popen(
2331 child_cmd + fix_python_path(cmd),
2332 stdin=subprocess.PIPE,
2333 stdout=stdout,
2334 stderr=stderr,
2335 cwd=cwd)
2336 logging.debug('Started child pid: %d' % child.pid)
2337
2338 out = child.communicate()[0]
2339 # This doesn't mean tracing is done, one of the grand-child process may
2340 # still be alive. It will be tracked with the dtrace script.
2341
2342 with self._lock:
2343 assert tracename not in (i['trace'] for i in self._traces)
2344 self._traces.append(
2345 {
2346 'cmd': cmd,
2347 'cwd': cwd,
2348 # The pid of strace process, not very useful.
2349 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002350 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002351 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002352 })
2353 return child.returncode, out
2354
2355 def close(self, timeout=None):
2356 """Terminates dtrace."""
2357 logging.debug('close(%s)' % timeout)
2358 try:
2359 try:
2360 super(Dtrace.Tracer, self).close(timeout)
2361 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002362 # ftruncate doesn't exist on Windows.
2363 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002364 if timeout:
2365 start = time.time()
2366 # Use polling. :/
2367 while (self._dtrace.poll() is None and
2368 (time.time() - start) < timeout):
2369 time.sleep(0.1)
2370 self._dtrace.kill()
2371 self._dtrace.wait()
2372 finally:
2373 # Make sure to kill it in any case.
2374 if self._dtrace.poll() is None:
2375 try:
2376 self._dtrace.kill()
2377 self._dtrace.wait()
2378 except OSError:
2379 pass
2380
2381 if self._dtrace.returncode != 0:
2382 # Warn about any dtrace failure but basically ignore it.
2383 print 'dtrace failure: %s' % self._dtrace.returncode
2384 finally:
2385 os.close(self._dummy_file_id)
2386 os.remove(self._dummy_file_name)
2387 os.remove(self._script)
2388
2389 def post_process_log(self):
2390 """Sorts the log back in order when each call occured.
2391
2392 dtrace doesn't save the buffer in strict order since it keeps one buffer
2393 per CPU.
2394 """
2395 super(Dtrace.Tracer, self).post_process_log()
2396 logname = self._logname + '.log'
2397 with open(logname, 'rb') as logfile:
2398 lines = [l for l in logfile if l.strip()]
2399 errors = [l for l in lines if l.startswith('dtrace:')]
2400 if errors:
2401 raise TracingFailure(
2402 'Found errors in the trace: %s' % '\n'.join(errors),
2403 None, None, None, logname)
2404 try:
2405 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2406 except ValueError:
2407 raise TracingFailure(
2408 'Found errors in the trace: %s' % '\n'.join(
2409 l for l in lines if l.split(' ', 1)[0].isdigit()),
2410 None, None, None, logname)
2411 with open(logname, 'wb') as logfile:
2412 logfile.write(''.join(lines))
2413
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002414 def __init__(self, use_sudo=None):
2415 super(Dtrace, self).__init__()
2416 self.use_sudo = use_sudo
2417
2418 def get_tracer(self, logname):
2419 return self.Tracer(logname, self.use_sudo)
2420
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002421 @staticmethod
2422 def clean_trace(logname):
2423 for ext in ('', '.log'):
2424 if os.path.isfile(logname + ext):
2425 os.remove(logname + ext)
2426
2427 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002428 def parse_log(cls, logname, blacklist, trace_name):
2429 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002430 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002431
2432 def blacklist_more(filepath):
2433 # All the HFS metadata is in the form /.vol/...
2434 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2435
2436 data = read_json(logname)
2437 out = []
2438 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002439 if trace_name and item['trace'] != trace_name:
2440 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002441 result = {
2442 'output': item['output'],
2443 'trace': item['trace'],
2444 }
2445 try:
2446 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002447 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2448 # be valid UTF-8 and we control the log output.
2449 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002450 context.on_line(line)
2451 result['results'] = context.to_results()
2452 except TracingFailure:
2453 result['exception'] = sys.exc_info()
2454 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002455 return out
2456
2457
2458class LogmanTrace(ApiBase):
2459 """Uses the native Windows ETW based tracing functionality to trace a child
2460 process.
2461
2462 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2463 the Windows Kernel doesn't have a concept of 'current working directory' at
2464 all. A Win32 process has a map of current directories, one per drive letter
2465 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2466 opened relative to another file_object or as an absolute path. All the current
2467 working directory logic is done in user mode.
2468 """
2469 class Context(ApiBase.Context):
2470 """Processes a ETW log line and keeps the list of existent and non
2471 existent files accessed.
2472
2473 Ignores directories.
2474 """
2475 # These indexes are for the stripped version in json.
2476 EVENT_NAME = 0
2477 TYPE = 1
2478 PID = 2
2479 TID = 3
2480 PROCESSOR_ID = 4
2481 TIMESTAMP = 5
2482 USER_DATA = 6
2483
2484 class Process(ApiBase.Context.Process):
2485 def __init__(self, *args):
2486 super(LogmanTrace.Context.Process, self).__init__(*args)
2487 # Handle file objects that succeeded.
2488 self.file_objects = {}
2489
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002490 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2491 logging.info(
2492 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2493 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002494 super(LogmanTrace.Context, self).__init__(blacklist)
2495 self._drive_map = DosDriveMap()
2496 # Threads mapping to the corresponding process id.
2497 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002498 # Process ID of the tracer, e.g. the temporary script created by
2499 # create_thunk(). This is tricky because the process id may have been
2500 # reused.
2501 self._thunk_pid = thunk_pid
2502 self._thunk_cmd = thunk_cmd
2503 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002504 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002505 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002506
2507 def on_line(self, line):
2508 """Processes a json Event line."""
2509 self._line_number += 1
2510 try:
2511 # By Opcode
2512 handler = getattr(
2513 self,
2514 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2515 None)
2516 if not handler:
2517 raise TracingFailure(
2518 'Unexpected event %s_%s' % (
2519 line[self.EVENT_NAME], line[self.TYPE]),
2520 None, None, None)
2521 handler(line)
2522 except TracingFailure, e:
2523 # Hack in the values since the handler could be a static function.
2524 e.pid = line[self.PID]
2525 e.line = line
2526 e.line_number = self._line_number
2527 # Re-raise the modified exception.
2528 raise
2529 except (KeyError, NotImplementedError, ValueError), e:
2530 raise TracingFailure(
2531 'Trace generated a %s exception: %s' % (
2532 e.__class__.__name__, str(e)),
2533 line[self.PID],
2534 self._line_number,
2535 line,
2536 e)
2537
2538 def to_results(self):
2539 if not self.root_process:
2540 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002541 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002542 None, None, None)
2543 process = self.root_process.to_results_process()
2544 return Results(process)
2545
2546 def _thread_to_process(self, tid):
2547 """Finds the process from the thread id."""
2548 tid = int(tid, 16)
2549 pid = self._threads_active.get(tid)
2550 if not pid or not self._process_lookup.get(pid):
2551 return
2552 return self._process_lookup[pid]
2553
2554 @classmethod
2555 def handle_EventTrace_Header(cls, line):
2556 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2557 BUFFER_SIZE = cls.USER_DATA
2558 #VERSION = cls.USER_DATA + 1
2559 #PROVIDER_VERSION = cls.USER_DATA + 2
2560 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2561 #END_TIME = cls.USER_DATA + 4
2562 #TIMER_RESOLUTION = cls.USER_DATA + 5
2563 #MAX_FILE_SIZE = cls.USER_DATA + 6
2564 #LOG_FILE_MODE = cls.USER_DATA + 7
2565 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2566 #START_BUFFERS = cls.USER_DATA + 9
2567 #POINTER_SIZE = cls.USER_DATA + 10
2568 EVENTS_LOST = cls.USER_DATA + 11
2569 #CPU_SPEED = cls.USER_DATA + 12
2570 #LOGGER_NAME = cls.USER_DATA + 13
2571 #LOG_FILE_NAME = cls.USER_DATA + 14
2572 #BOOT_TIME = cls.USER_DATA + 15
2573 #PERF_FREQ = cls.USER_DATA + 16
2574 #START_TIME = cls.USER_DATA + 17
2575 #RESERVED_FLAGS = cls.USER_DATA + 18
2576 #BUFFERS_LOST = cls.USER_DATA + 19
2577 #SESSION_NAME_STRING = cls.USER_DATA + 20
2578 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2579 if line[EVENTS_LOST] != '0':
2580 raise TracingFailure(
2581 ( '%s events were lost during trace, please increase the buffer '
2582 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2583 None, None, None)
2584
2585 def handle_FileIo_Cleanup(self, line):
2586 """General wisdom: if a file is closed, it's because it was opened.
2587
2588 Note that FileIo_Close is not used since if a file was opened properly but
2589 not closed before the process exits, only Cleanup will be logged.
2590 """
2591 #IRP = self.USER_DATA
2592 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2593 FILE_OBJECT = self.USER_DATA + 2
2594 #FILE_KEY = self.USER_DATA + 3
2595 proc = self._thread_to_process(line[TTID])
2596 if not proc:
2597 # Not a process we care about.
2598 return
2599 file_object = line[FILE_OBJECT]
2600 if file_object in proc.file_objects:
2601 proc.add_file(proc.file_objects.pop(file_object), False)
2602
2603 def handle_FileIo_Create(self, line):
2604 """Handles a file open.
2605
2606 All FileIo events are described at
2607 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2608 for some value of 'description'.
2609
2610 " (..) process and thread id values of the IO events (..) are not valid "
2611 http://msdn.microsoft.com/magazine/ee358703.aspx
2612
2613 The FileIo.Create event doesn't return if the CreateFile() call
2614 succeeded, so keep track of the file_object and check that it is
2615 eventually closed with FileIo_Cleanup.
2616 """
2617 #IRP = self.USER_DATA
2618 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2619 FILE_OBJECT = self.USER_DATA + 2
2620 #CREATE_OPTIONS = self.USER_DATA + 3
2621 #FILE_ATTRIBUTES = self.USER_DATA + 4
2622 #self.USER_DATA + SHARE_ACCESS = 5
2623 OPEN_PATH = self.USER_DATA + 6
2624
2625 proc = self._thread_to_process(line[TTID])
2626 if not proc:
2627 # Not a process we care about.
2628 return
2629
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002630 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002631 # Ignore directories and bare drive right away.
2632 if raw_path.endswith(os.path.sep):
2633 return
2634 filepath = self._drive_map.to_win32(raw_path)
2635 # Ignore bare drive right away. Some may still fall through with format
2636 # like '\\?\X:'
2637 if len(filepath) == 2:
2638 return
2639 file_object = line[FILE_OBJECT]
2640 if os.path.isdir(filepath):
2641 # There is no O_DIRECTORY equivalent on Windows. The closed is
2642 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2643 # simply discard directories are they are found.
2644 return
2645 # Override any stale file object
2646 proc.file_objects[file_object] = filepath
2647
2648 def handle_FileIo_Rename(self, line):
2649 # TODO(maruel): Handle?
2650 pass
2651
2652 def handle_Process_End(self, line):
2653 pid = line[self.PID]
2654 if self._process_lookup.get(pid):
2655 logging.info('Terminated: %d' % pid)
2656 self._process_lookup[pid] = None
2657 else:
2658 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002659 if self._thunk_process and self._thunk_process.pid == pid:
2660 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002661
2662 def handle_Process_Start(self, line):
2663 """Handles a new child process started by PID."""
2664 #UNIQUE_PROCESS_KEY = self.USER_DATA
2665 PROCESS_ID = self.USER_DATA + 1
2666 #PARENT_PID = self.USER_DATA + 2
2667 #SESSION_ID = self.USER_DATA + 3
2668 #EXIT_STATUS = self.USER_DATA + 4
2669 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2670 #USER_SID = self.USER_DATA + 6
2671 IMAGE_FILE_NAME = self.USER_DATA + 7
2672 COMMAND_LINE = self.USER_DATA + 8
2673
2674 ppid = line[self.PID]
2675 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002676 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002677 logging.debug(
2678 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002679 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002680
2681 if pid == self._thunk_pid:
2682 # Need to ignore processes we don't know about because the log is
2683 # system-wide. self._thunk_pid shall start only one process.
2684 # This is tricky though because Windows *loves* to reuse process id and
2685 # it happens often that the process ID of the thunk script created by
2686 # create_thunk() is reused. So just detecting the pid here is not
2687 # sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002688 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2689 logging.info(
2690 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2691 pid, self._trace_name, command_line, self._thunk_cmd)
2692 return
2693
2694 # TODO(maruel): The check is quite weak. Add the thunk path.
2695 if self._thunk_process:
2696 raise TracingFailure(
2697 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2698 'already set') % (self._thunk_pid, self._thunk_process.pid),
2699 None, None, None)
2700 proc = self.Process(self.blacklist, pid, None)
2701 self._thunk_process = proc
2702 return
2703 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002704 proc = self.Process(self.blacklist, pid, None)
2705 self.root_process = proc
2706 ppid = None
2707 elif self._process_lookup.get(ppid):
2708 proc = self.Process(self.blacklist, pid, None)
2709 self._process_lookup[ppid].children.append(proc)
2710 else:
2711 # Ignore
2712 return
2713 self._process_lookup[pid] = proc
2714
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002715 proc.command = command_line
2716 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002717 # proc.command[0] may be the absolute path of 'executable' but it may be
2718 # anything else too. If it happens that command[0] ends with executable,
2719 # use it, otherwise defaults to the base name.
2720 cmd0 = proc.command[0].lower()
2721 if not cmd0.endswith('.exe'):
2722 # TODO(maruel): That's not strictly true either.
2723 cmd0 += '.exe'
2724 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
2725 # Fix the path.
2726 cmd0 = cmd0.replace('/', os.path.sep)
2727 cmd0 = os.path.normpath(cmd0)
2728 proc.executable = get_native_path_case(cmd0)
2729 logging.info(
2730 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2731
2732 def handle_Thread_End(self, line):
2733 """Has the same parameters as Thread_Start."""
2734 tid = int(line[self.TID], 16)
2735 self._threads_active.pop(tid, None)
2736
2737 def handle_Thread_Start(self, line):
2738 """Handles a new thread created.
2739
2740 Do not use self.PID here since a process' initial thread is created by
2741 the parent process.
2742 """
2743 PROCESS_ID = self.USER_DATA
2744 TTHREAD_ID = self.USER_DATA + 1
2745 #STACK_BASE = self.USER_DATA + 2
2746 #STACK_LIMIT = self.USER_DATA + 3
2747 #USER_STACK_BASE = self.USER_DATA + 4
2748 #USER_STACK_LIMIT = self.USER_DATA + 5
2749 #AFFINITY = self.USER_DATA + 6
2750 #WIN32_START_ADDR = self.USER_DATA + 7
2751 #TEB_BASE = self.USER_DATA + 8
2752 #SUB_PROCESS_TAG = self.USER_DATA + 9
2753 #BASE_PRIORITY = self.USER_DATA + 10
2754 #PAGE_PRIORITY = self.USER_DATA + 11
2755 #IO_PRIORITY = self.USER_DATA + 12
2756 #THREAD_FLAGS = self.USER_DATA + 13
2757 # Do not use self.PID here since a process' initial thread is created by
2758 # the parent process.
2759 pid = int(line[PROCESS_ID], 16)
2760 tid = int(line[TTHREAD_ID], 16)
2761 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2762 self._threads_active[tid] = pid
2763
2764 @classmethod
2765 def supported_events(cls):
2766 """Returns all the procesed events."""
2767 out = []
2768 for member in dir(cls):
2769 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2770 if match:
2771 out.append(match.groups())
2772 return out
2773
2774 class Tracer(ApiBase.Tracer):
2775 # The basic headers.
2776 EXPECTED_HEADER = [
2777 u'Event Name',
2778 u'Type',
2779 u'Event ID',
2780 u'Version',
2781 u'Channel',
2782 u'Level', # 5
2783 u'Opcode',
2784 u'Task',
2785 u'Keyword',
2786 u'PID',
2787 u'TID', # 10
2788 u'Processor Number',
2789 u'Instance ID',
2790 u'Parent Instance ID',
2791 u'Activity ID',
2792 u'Related Activity ID', # 15
2793 u'Clock-Time',
2794 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2795 u'User(ms)', # pretty much useless.
2796 u'User Data', # Extra arguments that are event-specific.
2797 ]
2798 # Only the useful headers common to all entries are listed there. Any column
2799 # at 19 or higher is dependent on the specific event.
2800 EVENT_NAME = 0
2801 TYPE = 1
2802 PID = 9
2803 TID = 10
2804 PROCESSOR_ID = 11
2805 TIMESTAMP = 16
2806 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2807 USER_DATA = 19
2808
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002809 class CsvReader(object):
2810 """CSV reader that reads files generated by tracerpt.exe.
2811
2812 csv.reader() fails to read them properly, it mangles file names quoted
2813 with "" with a comma in it.
2814 """
2815 # 0. Had a ',' or one of the following ' ' after a comma, next should
2816 # be ' ', '"' or string or ',' for an empty field.
2817 ( HAD_DELIMITER,
2818 # 1. Processing an unquoted field up to ','.
2819 IN_STR,
2820 # 2. Processing a new field starting with '"'.
2821 STARTING_STR_QUOTED,
2822 # 3. Second quote in a row at the start of a field. It could be either
2823 # '""foo""' or '""'. Who the hell thought it was a great idea to use
2824 # the same character for delimiting and escaping?
2825 STARTING_SECOND_QUOTE,
2826 # 4. A quote inside a quoted string where the previous character was
2827 # not a quote, so the string is not empty. Can be either: end of a
2828 # quoted string (a delimiter) or a quote escape. The next char must be
2829 # either '"' or ','.
2830 HAD_QUOTE_IN_QUOTED,
2831 # 5. Second quote inside a quoted string.
2832 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
2833 # 6. Processing a field that started with '"'.
2834 IN_STR_QUOTED) = range(7)
2835
2836 def __init__(self, f):
2837 self.f = f
2838
2839 def __iter__(self):
2840 return self
2841
2842 def next(self):
2843 """Splits the line in fields."""
2844 line = self.f.readline()
2845 if not line:
2846 raise StopIteration()
2847 line = line.strip()
2848 fields = []
2849 state = self.HAD_DELIMITER
2850 for i, c in enumerate(line):
2851 if state == self.HAD_DELIMITER:
2852 if c == ',':
2853 # Empty field.
2854 fields.append('')
2855 elif c == ' ':
2856 # Ignore initial whitespaces
2857 pass
2858 elif c == '"':
2859 state = self.STARTING_STR_QUOTED
2860 fields.append('')
2861 else:
2862 # Start of a new field.
2863 state = self.IN_STR
2864 fields.append(c)
2865
2866 elif state == self.IN_STR:
2867 # Do not accept quote inside unquoted field.
2868 assert c != '"', (i, c, line, fields)
2869 if c == ',':
2870 fields[-1] = fields[-1].strip()
2871 state = self.HAD_DELIMITER
2872 else:
2873 fields[-1] = fields[-1] + c
2874
2875 elif state == self.STARTING_STR_QUOTED:
2876 if c == '"':
2877 # Do not store the character yet.
2878 state = self.STARTING_SECOND_QUOTE
2879 else:
2880 state = self.IN_STR_QUOTED
2881 fields[-1] = fields[-1] + c
2882
2883 elif state == self.STARTING_SECOND_QUOTE:
2884 if c == ',':
2885 # It was an empty field. '""' == ''.
2886 state = self.HAD_DELIMITER
2887 else:
2888 fields[-1] = fields[-1] + '"' + c
2889 state = self.IN_STR_QUOTED
2890
2891 elif state == self.HAD_QUOTE_IN_QUOTED:
2892 if c == ',':
2893 # End of the string.
2894 state = self.HAD_DELIMITER
2895 elif c == '"':
2896 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
2897 else:
2898 # The previous double-quote was just an unescaped quote.
2899 fields[-1] = fields[-1] + '"' + c
2900 state = self.IN_STR_QUOTED
2901
2902 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2903 if c == ',':
2904 # End of the string.
2905 state = self.HAD_DELIMITER
2906 fields[-1] = fields[-1] + '"'
2907 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00002908 # That's just how the logger rolls. Revert back to appending the
2909 # char and "guess" it was a quote in a double-quoted string.
2910 state = self.IN_STR_QUOTED
2911 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002912
2913 elif state == self.IN_STR_QUOTED:
2914 if c == '"':
2915 # Could be a delimiter or an escape.
2916 state = self.HAD_QUOTE_IN_QUOTED
2917 else:
2918 fields[-1] = fields[-1] + c
2919
2920 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2921 fields[-1] = fields[-1] + '"'
2922 else:
2923 assert state in (
2924 # Terminated with a normal field.
2925 self.IN_STR,
2926 # Terminated with an empty field.
2927 self.STARTING_SECOND_QUOTE,
2928 # Terminated with a normal quoted field.
2929 self.HAD_QUOTE_IN_QUOTED), (
2930 line, state, fields)
2931 return fields
2932
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002933 def __init__(self, logname):
2934 """Starts the log collection.
2935
2936 Requires administrative access. logman.exe is synchronous so no need for a
2937 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2938 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2939
2940 One can get the list of potentially interesting providers with:
2941 "logman query providers | findstr /i file"
2942 """
2943 super(LogmanTrace.Tracer, self).__init__(logname)
2944 self._script = create_thunk()
2945 cmd_start = [
2946 'logman.exe',
2947 'start',
2948 'NT Kernel Logger',
2949 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2950 # splitio,fileiocompletion,syscall,file,cswitch,img
2951 '(process,fileio,thread)',
2952 '-o', self._logname + '.etl',
2953 '-ets', # Send directly to kernel
2954 # Values extracted out of thin air.
2955 # Event Trace Session buffer size in kb.
2956 '-bs', '10240',
2957 # Number of Event Trace Session buffers.
2958 '-nb', '16', '256',
2959 ]
2960 logging.debug('Running: %s' % cmd_start)
2961 try:
2962 subprocess.check_call(
2963 cmd_start,
2964 stdin=subprocess.PIPE,
2965 stdout=subprocess.PIPE,
2966 stderr=subprocess.STDOUT)
2967 except subprocess.CalledProcessError, e:
2968 if e.returncode == -2147024891:
2969 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2970 elif e.returncode == -2144337737:
2971 print >> sys.stderr, (
2972 'A kernel trace was already running, stop it and try again')
2973 raise
2974
2975 def trace(self, cmd, cwd, tracename, output):
2976 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2977 assert os.path.isabs(cmd[0]), cmd[0]
2978 assert os.path.isabs(cwd), cwd
2979 assert os.path.normpath(cwd) == cwd, cwd
2980 with self._lock:
2981 if not self._initialized:
2982 raise TracingFailure(
2983 'Called Tracer.trace() on an unitialized object',
2984 None, None, None, tracename)
2985 assert tracename not in (i['trace'] for i in self._traces)
2986
2987 # Use "logman -?" for help.
2988
2989 stdout = stderr = None
2990 if output:
2991 stdout = subprocess.PIPE
2992 stderr = subprocess.STDOUT
2993
2994 # Run the child process.
2995 logging.debug('Running: %s' % cmd)
2996 # Use the temporary script generated with create_thunk() so we have a
2997 # clear pid owner. Since trace_inputs.py can be used as a library and
2998 # could trace multiple processes simultaneously, it makes it more complex
2999 # if the executable to be traced is executed directly here. It also solves
3000 # issues related to logman.exe that needs to be executed to control the
3001 # kernel trace.
3002 child_cmd = [
3003 sys.executable,
3004 self._script,
3005 tracename,
3006 ]
3007 child = subprocess.Popen(
3008 child_cmd + fix_python_path(cmd),
3009 cwd=cwd,
3010 stdin=subprocess.PIPE,
3011 stdout=stdout,
3012 stderr=stderr)
3013 logging.debug('Started child pid: %d' % child.pid)
3014 out = child.communicate()[0]
3015 # This doesn't mean all the grand-children are done. Sadly, we don't have
3016 # a good way to determine that.
3017
3018 with self._lock:
3019 assert tracename not in (i['trace'] for i in self._traces)
3020 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00003021 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003022 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00003023 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003024 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00003025 # Used to figure out the real process when process ids are reused.
3026 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003027 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003028 })
3029
3030 return child.returncode, out
3031
3032 def close(self, _timeout=None):
3033 """Stops the kernel log collection and converts the traces to text
3034 representation.
3035 """
3036 with self._lock:
3037 if not self._initialized:
3038 raise TracingFailure(
3039 'Called Tracer.close() on an unitialized object',
3040 None, None, None)
3041 os.remove(self._script)
3042 # Save metadata, add 'format' key..
3043 data = {
3044 'format': 'csv',
3045 'traces': self._traces,
3046 }
3047 write_json(self._logname, data, False)
3048
3049 cmd_stop = [
3050 'logman.exe',
3051 'stop',
3052 'NT Kernel Logger',
3053 '-ets', # Sends the command directly to the kernel.
3054 ]
3055 logging.debug('Running: %s' % cmd_stop)
3056 subprocess.check_call(
3057 cmd_stop,
3058 stdin=subprocess.PIPE,
3059 stdout=subprocess.PIPE,
3060 stderr=subprocess.STDOUT)
3061 self._initialized = False
3062
3063 def post_process_log(self):
3064 """Converts the .etl file into .csv then into .json."""
3065 super(LogmanTrace.Tracer, self).post_process_log()
3066 logformat = 'csv'
3067 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003068 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003069
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003070 def _trim_log(self, logformat):
3071 """Reduces the amount of data in original log by generating a 'reduced'
3072 log.
3073 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003074 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003075 file_handle = codecs.open(
3076 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003077
3078 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003079 assert sys.getfilesystemencoding() == 'mbcs'
3080 file_handle = codecs.open(
3081 self._logname + '.' + logformat, 'r',
3082 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003083
3084 supported_events = LogmanTrace.Context.supported_events()
3085
3086 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003087 """Loads items from the generator and returns the interesting data.
3088
3089 It filters out any uninteresting line and reduce the amount of data in
3090 the trace.
3091 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003092 for index, line in enumerate(generator):
3093 if not index:
3094 if line != self.EXPECTED_HEADER:
3095 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003096 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003097 None, None, None)
3098 continue
3099 # As you can see, the CSV is full of useful non-redundant information:
3100 if (line[2] != '0' or # Event ID
3101 line[3] not in ('2', '3') or # Version
3102 line[4] != '0' or # Channel
3103 line[5] != '0' or # Level
3104 line[7] != '0' or # Task
3105 line[8] != '0x0000000000000000' or # Keyword
3106 line[12] != '' or # Instance ID
3107 line[13] != '' or # Parent Instance ID
3108 line[14] != self.NULL_GUID or # Activity ID
3109 line[15] != ''): # Related Activity ID
3110 raise TracingFailure(
3111 'Found unexpected values in line: %s' % ' '.join(line),
3112 None, None, None)
3113
3114 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3115 continue
3116
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003117 yield [
3118 line[self.EVENT_NAME],
3119 line[self.TYPE],
3120 line[self.PID],
3121 line[self.TID],
3122 line[self.PROCESSOR_ID],
3123 line[self.TIMESTAMP],
3124 ] + line[self.USER_DATA:]
3125
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003126 # must not convert the trim() call into a list, since it will use too much
3127 # memory for large trace. use a csv file as a workaround since the json
3128 # parser requires a complete in-memory file.
3129 with open('%s.preprocessed' % self._logname, 'wb') as f:
3130 # $ and * can't be used in file name on windows, reducing the likelihood
3131 # of having to escape a string.
3132 out = csv.writer(
3133 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003134 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003135 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003136
3137 def _convert_log(self, logformat):
3138 """Converts the ETL trace to text representation.
3139
3140 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3141 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3142
3143 Arguments:
3144 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3145
3146 Use "tracerpt -?" for help.
3147 """
3148 LOCALE_INVARIANT = 0x7F
3149 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3150 cmd_convert = [
3151 'tracerpt.exe',
3152 '-l', self._logname + '.etl',
3153 '-o', self._logname + '.' + logformat,
3154 '-gmt', # Use UTC
3155 '-y', # No prompt
3156 # Use -of XML to get the header of each items after column 19, e.g. all
3157 # the actual headers of 'User Data'.
3158 ]
3159
3160 if logformat == 'csv':
3161 # tracerpt localizes the 'Type' column, for major brainfuck
3162 # entertainment. I can't imagine any sane reason to do that.
3163 cmd_convert.extend(['-of', 'CSV'])
3164 elif logformat == 'csv_utf16':
3165 # This causes it to use UTF-16, which doubles the log size but ensures
3166 # the log is readable for non-ASCII characters.
3167 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3168 elif logformat == 'xml':
3169 cmd_convert.extend(['-of', 'XML'])
3170 else:
3171 raise ValueError('Unexpected log format \'%s\'' % logformat)
3172 logging.debug('Running: %s' % cmd_convert)
3173 # This can takes tens of minutes for large logs.
3174 # Redirects all output to stderr.
3175 subprocess.check_call(
3176 cmd_convert,
3177 stdin=subprocess.PIPE,
3178 stdout=sys.stderr,
3179 stderr=sys.stderr)
3180
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003181 def __init__(self, use_sudo=False): # pylint: disable=W0613
3182 super(LogmanTrace, self).__init__()
3183 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3184
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003185 @staticmethod
3186 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003187 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003188 if os.path.isfile(logname + ext):
3189 os.remove(logname + ext)
3190
3191 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003192 def parse_log(cls, logname, blacklist, trace_name):
3193 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003194 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003195
3196 def blacklist_more(filepath):
3197 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3198 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3199
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003200 # Create a list of (Context, result_dict) tuples. This is necessary because
3201 # the csv file may be larger than the amount of available memory.
3202 contexes = [
3203 (
3204 cls.Context(
3205 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3206 {
3207 'output': item['output'],
3208 'trace': item['trace'],
3209 },
3210 )
3211 for item in read_json(logname)['traces']
3212 if not trace_name or item['trace'] == trace_name
3213 ]
3214
3215 # The log may be too large to fit in memory and it is not efficient to read
3216 # it multiple times, so multiplex the contexes instead, which is slightly
3217 # more awkward.
3218 with open('%s.preprocessed' % logname, 'rb') as f:
3219 lines = csv.reader(
3220 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3221 for encoded in lines:
3222 line = [s.decode('utf-8') for s in encoded]
3223 # Convert the PID in-place from hex.
3224 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3225 for context in contexes:
3226 if 'exception' in context[1]:
3227 continue
3228 try:
3229 context[0].on_line(line)
3230 except TracingFailure:
3231 context[1]['exception'] = sys.exc_info()
3232
3233 for context in contexes:
3234 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003235 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003236 context[1]['results'] = context[0].to_results()
3237
3238 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003239
3240
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003241def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003242 """Returns the correct implementation for the current OS."""
3243 if sys.platform == 'cygwin':
3244 raise NotImplementedError(
3245 'Not implemented for cygwin, start the script from Win32 python')
3246 flavors = {
3247 'win32': LogmanTrace,
3248 'darwin': Dtrace,
3249 'sunos5': Dtrace,
3250 'freebsd7': Dtrace,
3251 'freebsd8': Dtrace,
3252 }
3253 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003254 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003255
3256
3257def extract_directories(root_dir, files, blacklist):
3258 """Detects if all the files in a directory are in |files| and if so, replace
3259 the individual files by a Results.Directory instance.
3260
3261 Takes a list of Results.File instances and returns a shorter list of
3262 Results.File and Results.Directory instances.
3263
3264 Arguments:
3265 - root_dir: Optional base directory that shouldn't be search further.
3266 - files: list of Results.File instances.
3267 - blacklist: regexp of files to ignore, for example r'.+\.pyc'.
3268 """
3269 logging.info(
3270 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3271 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003272 # It is important for root_dir to not be a symlinked path, make sure to call
3273 # os.path.realpath() as needed.
3274 assert not root_dir or (
3275 os.path.realpath(get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003276 assert not any(isinstance(f, Results.Directory) for f in files)
3277 # Remove non existent files.
3278 files = [f for f in files if f.existent]
3279 if not files:
3280 return files
3281 # All files must share the same root, which can be None.
3282 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3283
3284 # Creates a {directory: {filename: File}} mapping, up to root.
3285 buckets = {}
3286 if root_dir:
3287 buckets[root_dir] = {}
3288 for fileobj in files:
3289 path = fileobj.full_path
3290 directory = os.path.dirname(path)
3291 assert directory
3292 # Do not use os.path.basename() so trailing os.path.sep is kept.
3293 basename = path[len(directory)+1:]
3294 files_in_directory = buckets.setdefault(directory, {})
3295 files_in_directory[basename] = fileobj
3296 # Add all the directories recursively up to root.
3297 while True:
3298 old_d = directory
3299 directory = os.path.dirname(directory)
3300 if directory + os.path.sep == root_dir or directory == old_d:
3301 break
3302 buckets.setdefault(directory, {})
3303
3304 root_prefix = len(root_dir) + 1 if root_dir else 0
3305 for directory in sorted(buckets, reverse=True):
3306 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3307 expected = set(buckets[directory])
3308 if not (actual - expected):
3309 parent = os.path.dirname(directory)
3310 buckets[parent][os.path.basename(directory)] = Results.Directory(
3311 root_dir,
3312 directory[root_prefix:],
3313 False,
3314 sum(f.size for f in buckets[directory].itervalues()),
3315 sum(f.nb_files for f in buckets[directory].itervalues()))
3316 # Remove the whole bucket.
3317 del buckets[directory]
3318
3319 # Reverse the mapping with what remains. The original instances are returned,
3320 # so the cached meta data is kept.
3321 files = sum((x.values() for x in buckets.itervalues()), [])
3322 return sorted(files, key=lambda x: x.path)
3323
3324
3325def trace(logfile, cmd, cwd, api, output):
3326 """Traces an executable. Returns (returncode, output) from api.
3327
3328 Arguments:
3329 - logfile: file to write to.
3330 - cmd: command to run.
3331 - cwd: current directory to start the process in.
3332 - api: a tracing api instance.
3333 - output: if True, returns output, otherwise prints it at the console.
3334 """
3335 cmd = fix_python_path(cmd)
3336 api.clean_trace(logfile)
3337 with api.get_tracer(logfile) as tracer:
3338 return tracer.trace(cmd, cwd, 'default', output)
3339
3340
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003341def CMDclean(args):
3342 """Cleans up traces."""
3343 parser = OptionParserTraceInputs(command='clean')
3344 options, args = parser.parse_args(args)
3345 api = get_api()
3346 api.clean_trace(options.log)
3347 return 0
3348
3349
3350def CMDtrace(args):
3351 """Traces an executable."""
3352 parser = OptionParserTraceInputs(command='trace')
3353 parser.allow_interspersed_args = False
3354 parser.add_option(
3355 '-q', '--quiet', action='store_true',
3356 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003357 parser.add_option(
3358 '-s', '--sudo', action='store_true',
3359 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3360 parser.add_option(
3361 '-n', '--no-sudo', action='store_false',
3362 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003363 options, args = parser.parse_args(args)
3364
3365 if not args:
3366 parser.error('Please provide a command to run')
3367
3368 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3369 args[0] = os.path.abspath(args[0])
3370
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003371 # options.sudo default value is None, which is to do whatever tracer defaults
3372 # do.
3373 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003374 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3375
3376
3377def CMDread(args):
3378 """Reads the logs and prints the result."""
3379 parser = OptionParserTraceInputs(command='read')
3380 parser.add_option(
3381 '-V', '--variable',
3382 nargs=2,
3383 action='append',
3384 dest='variables',
3385 metavar='VAR_NAME directory',
3386 default=[],
3387 help=('Variables to replace relative directories against. Example: '
3388 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3389 'home dir with $HOME') % getpass.getuser())
3390 parser.add_option(
3391 '--root-dir',
3392 help='Root directory to base everything off it. Anything outside of this '
3393 'this directory will not be reported')
3394 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003395 '--trace-name',
3396 help='Only reads one of the trace. Defaults to reading all traces')
3397 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003398 '-j', '--json', action='store_true',
3399 help='Outputs raw result data as json')
3400 parser.add_option(
3401 '-b', '--blacklist', action='append', default=[],
3402 help='List of regexp to use as blacklist filter')
3403 options, args = parser.parse_args(args)
3404
3405 if options.root_dir:
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003406 options.root_dir = unicode(os.path.abspath(options.root_dir))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003407
3408 variables = dict(options.variables)
3409 api = get_api()
3410 def blacklist(f):
3411 return any(re.match(b, f) for b in options.blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003412 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003413 # Process each trace.
3414 output_as_json = []
3415 for item in data:
3416 if 'exception' in item:
maruel@chromium.org424f5692012-10-15 15:42:38 +00003417 # Do not abort the other traces.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003418 print >> sys.stderr, (
maruel@chromium.org424f5692012-10-15 15:42:38 +00003419 'Trace %s: Got an exception: %s' % (
3420 item['trace'], item['exception'][1]))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003421 continue
3422 results = item['results']
3423 if options.root_dir:
3424 results = results.strip_root(options.root_dir)
3425
3426 if options.json:
3427 output_as_json.append(results.flatten())
3428 else:
3429 simplified = extract_directories(
3430 options.root_dir, results.files, blacklist)
3431 simplified = [f.replace_variables(variables) for f in simplified]
3432 if len(data) > 1:
3433 print('Trace: %s' % item['trace'])
3434 print('Total: %d' % len(results.files))
3435 print('Non existent: %d' % len(results.non_existent))
3436 for f in results.non_existent:
3437 print(' %s' % f.path)
3438 print(
3439 'Interesting: %d reduced to %d' % (
3440 len(results.existent), len(simplified)))
3441 for f in simplified:
3442 print(' %s' % f.path)
3443
3444 if options.json:
3445 write_json(sys.stdout, output_as_json, False)
3446 return 0
3447
3448
3449class OptionParserWithLogging(optparse.OptionParser):
3450 """Adds --verbose option."""
3451 def __init__(self, verbose=0, **kwargs):
3452 optparse.OptionParser.__init__(self, **kwargs)
3453 self.add_option(
3454 '-v', '--verbose',
3455 action='count',
3456 default=verbose,
3457 help='Use multiple times to increase verbosity')
3458
3459 def parse_args(self, *args, **kwargs):
3460 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
3461 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
3462 logging.basicConfig(
3463 level=levels[min(len(levels)-1, options.verbose)],
3464 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
3465 return options, args
3466
3467
3468class OptionParserWithNiceDescription(OptionParserWithLogging):
3469 """Generates the description with the command's docstring."""
3470 def __init__(self, **kwargs):
3471 """Sets 'description' and 'usage' if not already specified."""
3472 command = kwargs.pop('command', 'help')
3473 kwargs.setdefault(
3474 'description',
3475 re.sub('[\r\n ]{2,}', ' ', get_command_handler(command).__doc__))
3476 kwargs.setdefault('usage', '%%prog %s [options]' % command)
3477 OptionParserWithLogging.__init__(self, **kwargs)
3478
3479
3480class OptionParserTraceInputs(OptionParserWithNiceDescription):
3481 """Adds automatic --log handling."""
3482 def __init__(self, **kwargs):
3483 OptionParserWithNiceDescription.__init__(self, **kwargs)
3484 self.add_option(
3485 '-l', '--log', help='Log file to generate or read, required')
3486
3487 def parse_args(self, *args, **kwargs):
3488 """Makes sure the paths make sense.
3489
3490 On Windows, / and \ are often mixed together in a path.
3491 """
3492 options, args = OptionParserWithNiceDescription.parse_args(
3493 self, *args, **kwargs)
3494 if not options.log:
3495 self.error('Must supply a log file with -l')
3496 options.log = os.path.abspath(options.log)
3497 return options, args
3498
3499
3500def extract_documentation():
3501 """Returns a dict {command: description} for each of documented command."""
3502 commands = (
3503 fn[3:]
3504 for fn in dir(sys.modules['__main__'])
3505 if fn.startswith('CMD') and get_command_handler(fn[3:]).__doc__)
3506 return dict((fn, get_command_handler(fn).__doc__) for fn in commands)
3507
3508
3509def CMDhelp(args):
3510 """Prints list of commands or help for a specific command."""
3511 doc = extract_documentation()
3512 # Calculates the optimal offset.
3513 offset = max(len(cmd) for cmd in doc)
3514 format_str = ' %-' + str(offset + 2) + 's %s'
3515 # Generate a one-liner documentation of each commands.
3516 commands_description = '\n'.join(
3517 format_str % (cmd, doc[cmd].split('\n')[0]) for cmd in sorted(doc))
3518
3519 parser = OptionParserWithNiceDescription(
3520 usage='%prog <command> [options]',
3521 description='Commands are:\n%s\n' % commands_description)
3522 parser.format_description = lambda _: parser.description
3523
3524 # Strip out any -h or --help argument.
3525 _, args = parser.parse_args([i for i in args if not i in ('-h', '--help')])
3526 if len(args) == 1:
3527 if not get_command_handler(args[0]):
3528 parser.error('Unknown command %s' % args[0])
3529 # The command was "%prog help command", replaces ourself with
3530 # "%prog command --help" so help is correctly printed out.
3531 return main(args + ['--help'])
3532 elif args:
3533 parser.error('Unknown argument "%s"' % ' '.join(args))
3534 parser.print_help()
3535 return 0
3536
3537
3538def get_command_handler(name):
3539 """Returns the command handler or CMDhelp if it doesn't exist."""
3540 return getattr(sys.modules['__main__'], 'CMD%s' % name, None)
3541
3542
3543def main_impl(argv):
3544 command = get_command_handler(argv[0] if argv else 'help')
3545 if not command:
3546 return CMDhelp(argv)
3547 return command(argv[1:])
3548
3549def main(argv):
3550 try:
3551 main_impl(argv)
3552 except TracingFailure, e:
3553 sys.stderr.write('\nError: ')
3554 sys.stderr.write(str(e))
3555 sys.stderr.write('\n')
3556 return 1
3557
3558
3559if __name__ == '__main__':
3560 sys.exit(main(sys.argv[1:]))