blob: c19c9e23f52750b421c777b51a6e2919a1a66865 [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
3# Copyright (c) 2012 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
maruel@chromium.org9e56b592013-05-21 16:56:05 +000023import errno
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000024import getpass
25import glob
26import json
27import logging
28import optparse
29import os
30import re
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000031import stat
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000032import subprocess
33import sys
34import tempfile
35import threading
36import time
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000037import unicodedata
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000038import weakref
39
40## OS-specific imports
41
42if sys.platform == 'win32':
43 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p
44 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611
45 from ctypes.wintypes import GetLastError # pylint: disable=E0611
46elif sys.platform == 'darwin':
47 import Carbon.File # pylint: disable=F0401
48 import MacOS # pylint: disable=F0401
49
50
51BASE_DIR = os.path.dirname(os.path.abspath(__file__))
52ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
53
54
55class TracingFailure(Exception):
56 """An exception occured during tracing."""
57 def __init__(self, description, pid, line_number, line, *args):
58 super(TracingFailure, self).__init__(
59 description, pid, line_number, line, *args)
60 self.description = description
61 self.pid = pid
62 self.line_number = line_number
63 self.line = line
64 self.extra = args
65
66 def __str__(self):
67 out = self.description
68 if self.pid:
69 out += '\npid: %d' % self.pid
70 if self.line_number:
71 out += '\nline: %d' % self.line_number
72 if self.line:
73 out += '\n%s' % self.line
74 if self.extra:
75 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
76 return out
77
78
79## OS-specific functions
80
81if sys.platform == 'win32':
82 def QueryDosDevice(drive_letter):
83 """Returns the Windows 'native' path for a DOS drive letter."""
84 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
maruel@chromium.org306e0e72012-11-02 18:22:03 +000085 assert isinstance(drive_letter, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000086 # Guesswork. QueryDosDeviceW never returns the required number of bytes.
87 chars = 1024
maruel@chromium.org306e0e72012-11-02 18:22:03 +000088 drive_letter = drive_letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000089 p = create_unicode_buffer(chars)
90 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):
91 err = GetLastError()
92 if err:
93 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +000094 msg = u'QueryDosDevice(%s): %s (%d)' % (
95 drive_letter, FormatError(err), err)
96 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000097 return p.value
98
99
100 def GetShortPathName(long_path):
101 """Returns the Windows short path equivalent for a 'long' path."""
maruel@chromium.org68bb1482013-02-15 22:36:24 +0000102 assert isinstance(long_path, unicode), repr(long_path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000103 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
104 # not enforced.
105 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):
106 long_path = '\\\\?\\' + long_path
107 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)
108 if chars:
109 p = create_unicode_buffer(chars)
110 if windll.kernel32.GetShortPathNameW(long_path, p, chars):
111 return p.value
112
113 err = GetLastError()
114 if err:
115 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000116 msg = u'GetShortPathName(%s): %s (%d)' % (
117 long_path, FormatError(err), err)
118 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000119
120
121 def GetLongPathName(short_path):
122 """Returns the Windows long path equivalent for a 'short' path."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000123 assert isinstance(short_path, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000124 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
125 # not enforced.
126 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):
127 short_path = '\\\\?\\' + short_path
128 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)
129 if chars:
130 p = create_unicode_buffer(chars)
131 if windll.kernel32.GetLongPathNameW(short_path, p, chars):
132 return p.value
133
134 err = GetLastError()
135 if err:
136 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000137 msg = u'GetLongPathName(%s): %s (%d)' % (
138 short_path, FormatError(err), err)
139 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000140
141
142 def get_current_encoding():
143 """Returns the 'ANSI' code page associated to the process."""
144 return 'cp%d' % int(windll.kernel32.GetACP())
145
146
147 class DosDriveMap(object):
148 """Maps \Device\HarddiskVolumeN to N: on Windows."""
149 # Keep one global cache.
150 _MAPPING = {}
151
152 def __init__(self):
153 """Lazy loads the cache."""
154 if not self._MAPPING:
155 # This is related to UNC resolver on windows. Ignore that.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000156 self._MAPPING[u'\\Device\\Mup'] = None
157 self._MAPPING[u'\\SystemRoot'] = os.environ[u'SystemRoot']
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000158
159 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
160 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000161 letter = u'%s:' % letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000162 mapped = QueryDosDevice(letter)
163 if mapped in self._MAPPING:
164 logging.warn(
165 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
166 '. Drive letters are a user-mode concept and the kernel '
167 'traces only have NT path, so all accesses will be '
168 'associated with the first drive letter, independent of the '
169 'actual letter used by the code') % (
170 self._MAPPING[mapped], letter))
171 else:
172 self._MAPPING[mapped] = letter
173 except WindowsError: # pylint: disable=E0602
174 pass
175
176 def to_win32(self, path):
177 """Converts a native NT path to Win32/DOS compatible path."""
178 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
179 if not match:
180 raise ValueError(
181 'Can\'t convert %s into a Win32 compatible path' % path,
182 path)
183 if not match.group(1) in self._MAPPING:
184 # Unmapped partitions may be accessed by windows for the
185 # fun of it while the test is running. Discard these.
186 return None
187 drive = self._MAPPING[match.group(1)]
188 if not drive or not match.group(2):
189 return drive
190 return drive + match.group(2)
191
192
193 def isabs(path):
194 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""
195 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
196
197
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000198 def find_item_native_case(root, item):
199 """Gets the native path case of a single item based at root_path."""
200 if item == '..':
201 return item
202
203 root = get_native_path_case(root)
204 return os.path.basename(get_native_path_case(os.path.join(root, item)))
205
206
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000207 def get_native_path_case(p):
208 """Returns the native path case for an existing file.
209
210 On Windows, removes any leading '\\?\'.
211 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000212 assert isinstance(p, unicode), repr(p)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000213 if not isabs(p):
214 raise ValueError(
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000215 'get_native_path_case(%r): Require an absolute path' % p, p)
216
maruel@chromium.org037758d2012-12-10 17:59:46 +0000217 # Make sure it is normalized to os.path.sep. Do not do it here to keep the
218 # function fast
219 assert '/' not in p, p
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000220 suffix = ''
221 count = p.count(':')
222 if count > 1:
223 # This means it has an alternate-data stream. There could be 3 ':', since
224 # it could be the $DATA datastream of an ADS. Split the whole ADS suffix
225 # off and add it back afterward. There is no way to know the native path
226 # case of an alternate data stream.
227 items = p.split(':')
228 p = ':'.join(items[0:2])
229 suffix = ''.join(':' + i for i in items[2:])
230
maruel@chromium.org42b091d2013-04-02 22:27:02 +0000231 # TODO(maruel): Use os.path.normpath?
232 if p.endswith('.\\'):
233 p = p[:-2]
234
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000235 # Windows used to have an option to turn on case sensitivity on non Win32
236 # subsystem but that's out of scope here and isn't supported anymore.
237 # Go figure why GetShortPathName() is needed.
238 try:
239 out = GetLongPathName(GetShortPathName(p))
240 except OSError, e:
241 if e.args[0] in (2, 3, 5):
242 # The path does not exist. Try to recurse and reconstruct the path.
243 base = os.path.dirname(p)
244 rest = os.path.basename(p)
245 return os.path.join(get_native_path_case(base), rest)
246 raise
247 if out.startswith('\\\\?\\'):
248 out = out[4:]
249 # Always upper case the first letter since GetLongPathName() will return the
250 # drive letter in the case it was given.
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000251 return out[0].upper() + out[1:] + suffix
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000252
253
254 def CommandLineToArgvW(command_line):
255 """Splits a commandline into argv using CommandLineToArgvW()."""
256 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
257 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000258 assert isinstance(command_line, unicode)
259 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000260 try:
261 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
262 finally:
263 windll.kernel32.LocalFree(ptr)
264
265
266elif sys.platform == 'darwin':
267
268
269 # On non-windows, keep the stdlib behavior.
270 isabs = os.path.isabs
271
272
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000273 def _native_case(p):
274 """Gets the native path case. Warning: this function resolves symlinks."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000275 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000276 rel_ref, _ = Carbon.File.FSPathMakeRef(p.encode('utf-8'))
277 # The OSX underlying code uses NFD but python strings are in NFC. This
278 # will cause issues with os.listdir() for example. Since the dtrace log
279 # *is* in NFC, normalize it here.
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000280 out = unicodedata.normalize(
281 'NFC', rel_ref.FSRefMakePath().decode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000282 if p.endswith(os.path.sep) and not out.endswith(os.path.sep):
283 return out + os.path.sep
284 return out
285 except MacOS.Error, e:
286 if e.args[0] in (-43, -120):
287 # The path does not exist. Try to recurse and reconstruct the path.
288 # -43 means file not found.
289 # -120 means directory not found.
290 base = os.path.dirname(p)
291 rest = os.path.basename(p)
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000292 return os.path.join(_native_case(base), rest)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000293 raise OSError(
294 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])
295
296
297 def _split_at_symlink_native(base_path, rest):
298 """Returns the native path for a symlink."""
299 base, symlink, rest = split_at_symlink(base_path, rest)
300 if symlink:
301 if not base_path:
302 base_path = base
303 else:
304 base_path = safe_join(base_path, base)
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000305 symlink = find_item_native_case(base_path, symlink)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000306 return base, symlink, rest
307
308
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000309 def find_item_native_case(root_path, item):
310 """Gets the native path case of a single item based at root_path.
311
312 There is no API to get the native path case of symlinks on OSX. So it
313 needs to be done the slow way.
314 """
315 if item == '..':
316 return item
317
318 item = item.lower()
319 for element in os.listdir(root_path):
320 if element.lower() == item:
321 return element
322
323
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000324 def get_native_path_case(path):
325 """Returns the native path case for an existing file.
326
327 Technically, it's only HFS+ on OSX that is case preserving and
328 insensitive. It's the default setting on HFS+ but can be changed.
329 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000330 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000331 if not isabs(path):
332 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000333 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000334 if path.startswith('/dev'):
335 # /dev is not visible from Carbon, causing an exception.
336 return path
337
338 # Starts assuming there is no symlink along the path.
339 resolved = _native_case(path)
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000340 if path.lower() in (resolved.lower(), resolved.lower() + './'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000341 # This code path is incredibly faster.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000342 logging.debug('get_native_path_case(%s) = %s' % (path, resolved))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000343 return resolved
344
345 # There was a symlink, process it.
346 base, symlink, rest = _split_at_symlink_native(None, path)
347 assert symlink, (path, base, symlink, rest, resolved)
348 prev = base
349 base = safe_join(_native_case(base), symlink)
350 assert len(base) > len(prev)
351 while rest:
352 prev = base
353 relbase, symlink, rest = _split_at_symlink_native(base, rest)
354 base = safe_join(base, relbase)
355 assert len(base) > len(prev), (prev, base, symlink)
356 if symlink:
357 base = safe_join(base, symlink)
358 assert len(base) > len(prev), (prev, base, symlink)
359 # Make sure no symlink was resolved.
360 assert base.lower() == path.lower(), (base, path)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000361 logging.debug('get_native_path_case(%s) = %s' % (path, base))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000362 return base
363
364
365else: # OSes other than Windows and OSX.
366
367
368 # On non-windows, keep the stdlib behavior.
369 isabs = os.path.isabs
370
371
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000372 def find_item_native_case(root, item):
373 """Gets the native path case of a single item based at root_path."""
374 if item == '..':
375 return item
376
377 root = get_native_path_case(root)
378 return os.path.basename(get_native_path_case(os.path.join(root, item)))
379
380
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000381 def get_native_path_case(path):
382 """Returns the native path case for an existing file.
383
384 On OSes other than OSX and Windows, assume the file system is
385 case-sensitive.
386
387 TODO(maruel): This is not strictly true. Implement if necessary.
388 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000389 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000390 if not isabs(path):
391 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000392 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000393 # Give up on cygwin, as GetLongPathName() can't be called.
394 # Linux traces tends to not be normalized so use this occasion to normalize
395 # it. This function implementation already normalizes the path on the other
396 # OS so this needs to be done here to be coherent between OSes.
397 out = os.path.normpath(path)
398 if path.endswith(os.path.sep) and not out.endswith(os.path.sep):
399 return out + os.path.sep
400 return out
401
402
403if sys.platform != 'win32': # All non-Windows OSes.
404
405
406 def safe_join(*args):
407 """Joins path elements like os.path.join() but doesn't abort on absolute
408 path.
409
410 os.path.join('foo', '/bar') == '/bar'
411 but safe_join('foo', '/bar') == 'foo/bar'.
412 """
413 out = ''
414 for element in args:
415 if element.startswith(os.path.sep):
416 if out.endswith(os.path.sep):
417 out += element[1:]
418 else:
419 out += element
420 else:
421 if out.endswith(os.path.sep):
422 out += element
423 else:
424 out += os.path.sep + element
425 return out
426
427
428 def split_at_symlink(base_dir, relfile):
429 """Scans each component of relfile and cut the string at the symlink if
430 there is any.
431
432 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if
433 not symlink was found.
434 """
435 if base_dir:
436 assert relfile
437 assert os.path.isabs(base_dir)
438 index = 0
439 else:
440 assert os.path.isabs(relfile)
441 index = 1
442
443 def at_root(rest):
444 if base_dir:
445 return safe_join(base_dir, rest)
446 return rest
447
448 while True:
449 try:
450 index = relfile.index(os.path.sep, index)
451 except ValueError:
452 index = len(relfile)
453 full = at_root(relfile[:index])
454 if os.path.islink(full):
455 # A symlink!
456 base = os.path.dirname(relfile[:index])
457 symlink = os.path.basename(relfile[:index])
458 rest = relfile[index:]
459 logging.debug(
460 'split_at_symlink(%s, %s) -> (%s, %s, %s)' %
461 (base_dir, relfile, base, symlink, rest))
462 return base, symlink, rest
463 if index == len(relfile):
464 break
465 index += 1
466 return relfile, None, None
467
468
maruel@chromium.orgac36fb72013-05-21 14:50:53 +0000469class Unbuffered(object):
470 """Disable buffering on a file object."""
471 def __init__(self, stream):
472 self.stream = stream
473
474 def write(self, data):
475 self.stream.write(data)
476 if '\n' in data:
477 self.stream.flush()
478
479 def __getattr__(self, attr):
480 return getattr(self.stream, attr)
481
482
483def disable_buffering():
484 """Makes this process and child processes stdout unbuffered."""
485 if not os.environ.get('PYTHONUNBUFFERED'):
486 # Since sys.stdout is a C++ object, it's impossible to do
487 # sys.stdout.write = lambda...
488 sys.stdout = Unbuffered(sys.stdout)
489 os.environ['PYTHONUNBUFFERED'] = 'x'
490
491
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000492def fix_python_path(cmd):
493 """Returns the fixed command line to call the right python executable."""
494 out = cmd[:]
495 if out[0] == 'python':
496 out[0] = sys.executable
497 elif out[0].endswith('.py'):
498 out.insert(0, sys.executable)
499 return out
500
501
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000502def create_subprocess_thunk():
503 """Creates a small temporary script to start the child process.
504
505 This thunk doesn't block, its unique name is used to identify it as the
506 parent.
507 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000508 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000509 try:
510 os.write(
511 handle,
512 (
513 'import subprocess, sys\n'
514 'sys.exit(subprocess.call(sys.argv[2:]))\n'
515 ))
516 finally:
517 os.close(handle)
518 return name
519
520
521def create_exec_thunk():
522 """Creates a small temporary script to start the child executable.
523
524 Reads from the file handle provided as the fisrt argument to block, then
525 execv() the command to be traced.
526 """
527 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
528 try:
529 os.write(
530 handle,
531 (
532 'import os, sys\n'
533 'fd = int(sys.argv[1])\n'
534 # This will block until the controlling process writes a byte on the
535 # pipe. It will do so once the tracing tool, e.g. strace, is ready to
536 # trace.
537 'os.read(fd, 1)\n'
538 'os.close(fd)\n'
539 'os.execv(sys.argv[2], sys.argv[2:])\n'
540 ))
541 finally:
542 os.close(handle)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000543 return name
544
545
546def strace_process_quoted_arguments(text):
547 """Extracts quoted arguments on a string and return the arguments as a list.
548
549 Implemented as an automaton. Supports incomplete strings in the form
550 '"foo"...'.
551
552 Example:
553 With text = '"foo", "bar"', the function will return ['foo', 'bar']
554
555 TODO(maruel): Implement escaping.
556 """
557 # All the possible states of the DFA.
558 ( NEED_QUOTE, # Begining of a new arguments.
559 INSIDE_STRING, # Inside an argument.
560 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
561 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
562 # a serie of 3 dots or a comma.
563 NEED_SPACE, # Right after a comma
564 NEED_DOT_2, # Found a dot, need a second one.
565 NEED_DOT_3, # Found second dot, need a third one.
566 NEED_COMMA, # Found third dot, need a comma.
567 ) = range(8)
568
569 state = NEED_QUOTE
570 out = []
571 for index, char in enumerate(text):
572 if char == '"':
573 if state == NEED_QUOTE:
574 state = INSIDE_STRING
575 # A new argument was found.
576 out.append('')
577 elif state == INSIDE_STRING:
578 # The argument is now closed.
579 state = NEED_COMMA_OR_DOT
580 elif state == ESCAPED:
581 out[-1] += char
582 state = INSIDE_STRING
583 else:
584 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000585 'Can\'t process char \'%s\' at column %d for: %r' % (
586 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000587 index,
588 text)
589 elif char == ',':
590 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
591 state = NEED_SPACE
592 elif state == INSIDE_STRING:
593 out[-1] += char
594 elif state == ESCAPED:
595 out[-1] += char
596 state = INSIDE_STRING
597 else:
598 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000599 'Can\'t process char \'%s\' at column %d for: %r' % (
600 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000601 index,
602 text)
603 elif char == ' ':
604 if state == NEED_SPACE:
605 state = NEED_QUOTE
606 elif state == INSIDE_STRING:
607 out[-1] += char
608 elif state == ESCAPED:
609 out[-1] += char
610 state = INSIDE_STRING
611 else:
612 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000613 'Can\'t process char \'%s\' at column %d for: %r' % (
614 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000615 index,
616 text)
617 elif char == '.':
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000618 if state in (NEED_QUOTE, NEED_COMMA_OR_DOT):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000619 # The string is incomplete, this mean the strace -s flag should be
620 # increased.
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000621 # For NEED_QUOTE, the input string would look like '"foo", ...'.
622 # For NEED_COMMA_OR_DOT, the input string would look like '"foo"...'
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000623 state = NEED_DOT_2
624 elif state == NEED_DOT_2:
625 state = NEED_DOT_3
626 elif state == NEED_DOT_3:
627 state = NEED_COMMA
628 elif state == INSIDE_STRING:
629 out[-1] += char
630 elif state == ESCAPED:
631 out[-1] += char
632 state = INSIDE_STRING
633 else:
634 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000635 'Can\'t process char \'%s\' at column %d for: %r' % (
636 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000637 index,
638 text)
639 elif char == '\\':
640 if state == ESCAPED:
641 out[-1] += char
642 state = INSIDE_STRING
643 elif state == INSIDE_STRING:
644 state = ESCAPED
645 else:
646 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000647 'Can\'t process char \'%s\' at column %d for: %r' % (
648 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000649 index,
650 text)
651 else:
652 if state == INSIDE_STRING:
653 out[-1] += char
654 else:
655 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000656 'Can\'t process char \'%s\' at column %d for: %r' % (
657 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000658 index,
659 text)
660 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
661 raise ValueError(
662 'String is incorrectly terminated: %r' % text,
663 text)
664 return out
665
666
667def read_json(filepath):
668 with open(filepath, 'r') as f:
669 return json.load(f)
670
671
672def write_json(filepath_or_handle, data, dense):
673 """Writes data into filepath or file handle encoded as json.
674
675 If dense is True, the json is packed. Otherwise, it is human readable.
676 """
677 if hasattr(filepath_or_handle, 'write'):
678 if dense:
679 filepath_or_handle.write(
680 json.dumps(data, sort_keys=True, separators=(',',':')))
681 else:
682 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
683 else:
684 with open(filepath_or_handle, 'wb') as f:
685 if dense:
686 json.dump(data, f, sort_keys=True, separators=(',',':'))
687 else:
688 json.dump(data, f, sort_keys=True, indent=2)
689
690
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000691def assert_is_renderable(pseudo_string):
692 """Asserts the input is a valid object to be processed by render()."""
693 assert (
694 isinstance(pseudo_string, (None.__class__, unicode)) or
695 hasattr(pseudo_string, 'render')), repr(pseudo_string)
696
697
698def render(pseudo_string):
699 """Converts the pseudo-string to an unicode string."""
700 assert_is_renderable(pseudo_string)
701 if isinstance(pseudo_string, (None.__class__, unicode)):
702 return pseudo_string
703 return pseudo_string.render()
704
705
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000706class Results(object):
707 """Results of a trace session."""
708
709 class _TouchedObject(object):
710 """Something, a file or a directory, that was accessed."""
711 def __init__(self, root, path, tainted, size, nb_files):
712 logging.debug(
713 '%s(%s, %s, %s, %s, %s)' %
714 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000715 assert_is_renderable(root)
716 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000717 self.root = root
718 self.path = path
719 self.tainted = tainted
720 self.nb_files = nb_files
maruel@chromium.org538141b2013-06-03 20:57:17 +0000721 # Can be used as a cache or a default value, depending on context. In
722 # particular, once self.tainted is True, because the path was replaced
723 # with a variable, it is not possible to look up the file size.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000724 self._size = size
725 # These are cache only.
726 self._real_path = None
727
728 # Check internal consistency.
729 assert path, path
730 assert tainted or bool(root) != bool(isabs(path)), (root, path)
731 assert tainted or (
732 not os.path.exists(self.full_path) or
733 (self.full_path == get_native_path_case(self.full_path))), (
734 tainted, self.full_path, get_native_path_case(self.full_path))
735
736 @property
737 def existent(self):
738 return self.size != -1
739
740 @property
741 def full_path(self):
742 if self.root:
743 return os.path.join(self.root, self.path)
744 return self.path
745
746 @property
747 def real_path(self):
748 """Returns the path with symlinks resolved."""
749 if not self._real_path:
750 self._real_path = os.path.realpath(self.full_path)
751 return self._real_path
752
753 @property
754 def size(self):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000755 """File's size. -1 is not existent.
756
757 Once tainted, it is not possible the retrieve the file size anymore since
758 the path is composed of variables.
759 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000760 if self._size is None and not self.tainted:
761 try:
762 self._size = os.stat(self.full_path).st_size
763 except OSError:
764 self._size = -1
765 return self._size
766
767 def flatten(self):
768 """Returns a dict representing this object.
769
770 A 'size' of 0 means the file was only touched and not read.
771 """
772 return {
773 'path': self.path,
774 'size': self.size,
775 }
776
777 def replace_variables(self, variables):
778 """Replaces the root of this File with one of the variables if it matches.
779
780 If a variable replacement occurs, the cloned object becomes tainted.
781 """
782 for variable, root_path in variables.iteritems():
783 if self.path.startswith(root_path):
784 return self._clone(
785 self.root, variable + self.path[len(root_path):], True)
786 # No need to clone, returns ourself.
787 return self
788
789 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000790 """Returns a clone of itself with 'root' stripped off.
791
792 Note that the file is kept if it is either accessible from a symlinked
793 path that was used to access the file or through the real path.
794 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000795 # Check internal consistency.
796 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root
797 if not self.full_path.startswith(root):
798 # Now try to resolve the symlinks to see if it can be reached this way.
799 # Only try *after* trying without resolving symlink.
800 if not self.real_path.startswith(root):
801 return None
802 path = self.real_path
803 else:
804 path = self.full_path
805 return self._clone(root, path[len(root):], self.tainted)
806
807 def _clone(self, new_root, new_path, tainted):
808 raise NotImplementedError(self.__class__.__name__)
809
810 class File(_TouchedObject):
811 """A file that was accessed. May not be present anymore.
812
813 If tainted is true, it means it is not a real path anymore as a variable
814 replacement occured.
815
maruel@chromium.org538141b2013-06-03 20:57:17 +0000816 |mode| can be one of None, TOUCHED, READ or WRITE.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000817 """
maruel@chromium.org538141b2013-06-03 20:57:17 +0000818 # Was probed for existence, and it is existent, but was never _opened_.
819 TOUCHED = 't'
820 # Opened for read only and guaranteed to not have been written to.
821 READ = 'r'
822 # Opened for write.
823 WRITE = 'w'
824
825 # They are listed in order of priority. E.g. if a file is traced as TOUCHED
826 # then as WRITE, only keep WRITE. None means no idea, which is a problem on
827 # Windows.
828 ACCEPTABLE_MODES = (None, TOUCHED, READ, WRITE)
829
830 def __init__(self, root, path, tainted, size, mode):
831 assert mode in self.ACCEPTABLE_MODES
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000832 super(Results.File, self).__init__(root, path, tainted, size, 1)
maruel@chromium.org538141b2013-06-03 20:57:17 +0000833 self.mode = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000834
835 def _clone(self, new_root, new_path, tainted):
836 """Clones itself keeping meta-data."""
837 # Keep the self.size and self._real_path caches for performance reason. It
838 # is also important when the file becomes tainted (with a variable instead
839 # of the real path) since self.path is not an on-disk path anymore so
840 # out._size cannot be updated.
maruel@chromium.org538141b2013-06-03 20:57:17 +0000841 out = self.__class__(new_root, new_path, tainted, self.size, self.mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000842 out._real_path = self._real_path
843 return out
844
maruel@chromium.org538141b2013-06-03 20:57:17 +0000845 def flatten(self):
846 out = super(Results.File, self).flatten()
847 out['mode'] = self.mode
848 return out
849
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000850 class Directory(_TouchedObject):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000851 """A directory of files. Must exist.
852
853 For a Directory instance, self.size is not a cache, it's an actual value
854 that is never modified and represents the total size of the files contained
855 in this directory. It is possible that the directory is empty so that
856 size==0; this happens if there's only an invalid symlink in it.
857 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000858 def __init__(self, root, path, tainted, size, nb_files):
859 """path='.' is a valid value and must be handled appropriately."""
860 assert not path.endswith(os.path.sep), path
861 super(Results.Directory, self).__init__(
862 root, path + os.path.sep, tainted, size, nb_files)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000863
864 def flatten(self):
865 out = super(Results.Directory, self).flatten()
866 out['nb_files'] = self.nb_files
867 return out
868
869 def _clone(self, new_root, new_path, tainted):
870 """Clones itself keeping meta-data."""
871 out = self.__class__(
872 new_root,
873 new_path.rstrip(os.path.sep),
874 tainted,
875 self.size,
876 self.nb_files)
877 out._real_path = self._real_path
878 return out
879
880 class Process(object):
881 """A process that was traced.
882
883 Contains references to the files accessed by this process and its children.
884 """
885 def __init__(self, pid, files, executable, command, initial_cwd, children):
886 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
887 self.pid = pid
888 self.files = sorted(files, key=lambda x: x.path)
889 self.children = children
890 self.executable = executable
891 self.command = command
892 self.initial_cwd = initial_cwd
893
894 # Check internal consistency.
895 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
896 f.path for f in self.files)
897 assert isinstance(self.children, list)
898 assert isinstance(self.files, list)
899
900 @property
901 def all(self):
902 for child in self.children:
903 for i in child.all:
904 yield i
905 yield self
906
907 def flatten(self):
908 return {
909 'children': [c.flatten() for c in self.children],
910 'command': self.command,
911 'executable': self.executable,
912 'files': [f.flatten() for f in self.files],
913 'initial_cwd': self.initial_cwd,
914 'pid': self.pid,
915 }
916
917 def strip_root(self, root):
918 assert isabs(root) and root.endswith(os.path.sep), root
919 # Loads the files after since they are constructed as objects.
920 out = self.__class__(
921 self.pid,
922 filter(None, (f.strip_root(root) for f in self.files)),
923 self.executable,
924 self.command,
925 self.initial_cwd,
926 [c.strip_root(root) for c in self.children])
927 logging.debug(
928 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
929 return out
930
931 def __init__(self, process):
932 self.process = process
933 # Cache.
934 self._files = None
935
936 def flatten(self):
937 return {
938 'root': self.process.flatten(),
939 }
940
941 @property
942 def files(self):
943 if self._files is None:
944 self._files = sorted(
945 sum((p.files for p in self.process.all), []),
946 key=lambda x: x.path)
947 return self._files
948
949 @property
950 def existent(self):
951 return [f for f in self.files if f.existent]
952
953 @property
954 def non_existent(self):
955 return [f for f in self.files if not f.existent]
956
957 def strip_root(self, root):
958 """Returns a clone with all the files outside the directory |root| removed
959 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000960
961 It keeps files accessible through the |root| directory or that have been
962 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000963 """
964 # Resolve any symlink
965 root = os.path.realpath(root)
966 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep
967 logging.debug('strip_root(%s)' % root)
968 return Results(self.process.strip_root(root))
969
970
971class ApiBase(object):
972 """OS-agnostic API to trace a process and its children."""
973 class Context(object):
974 """Processes one log line at a time and keeps the list of traced processes.
975
976 The parsing is complicated by the fact that logs are traced out of order for
977 strace but in-order for dtrace and logman. In addition, on Windows it is
978 very frequent that processids are reused so a flat list cannot be used. But
979 at the same time, it is impossible to faithfully construct a graph when the
980 logs are processed out of order. So both a tree and a flat mapping are used,
981 the tree is the real process tree, while the flat mapping stores the last
982 valid process for the corresponding processid. For the strace case, the
983 tree's head is guessed at the last moment.
984 """
985 class Process(object):
986 """Keeps context for one traced child process.
987
988 Logs all the files this process touched. Ignores directories.
989 """
990 def __init__(self, blacklist, pid, initial_cwd):
991 # Check internal consistency.
992 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000993 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000994 self.pid = pid
995 # children are Process instances.
996 self.children = []
997 self.initial_cwd = initial_cwd
998 self.cwd = None
maruel@chromium.org538141b2013-06-03 20:57:17 +0000999 self.files = {}
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001000 self.executable = None
1001 self.command = None
1002 self._blacklist = blacklist
1003
1004 def to_results_process(self):
1005 """Resolves file case sensitivity and or late-bound strings."""
1006 # When resolving files, it's normal to get dupe because a file could be
1007 # opened multiple times with different case. Resolve the deduplication
1008 # here.
1009 def fix_path(x):
1010 """Returns the native file path case.
1011
1012 Converts late-bound strings.
1013 """
1014 if not x:
1015 # Do not convert None instance to 'None'.
1016 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001017 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001018 if os.path.isabs(x):
1019 # If the path is not absolute, which tends to happen occasionally on
1020 # Windows, it is not possible to get the native path case so ignore
1021 # that trace. It mostly happens for 'executable' value.
1022 x = get_native_path_case(x)
1023 return x
1024
maruel@chromium.org538141b2013-06-03 20:57:17 +00001025 def fix_and_blacklist_path(x, m):
1026 """Receives a tuple (filepath, mode) and processes filepath."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001027 x = fix_path(x)
1028 if not x:
1029 return
1030 # The blacklist needs to be reapplied, since path casing could
1031 # influence blacklisting.
1032 if self._blacklist(x):
1033 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001034 # Filters out directories. Some may have passed through.
1035 if os.path.isdir(x):
1036 return
1037 return x, m
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001038
maruel@chromium.org538141b2013-06-03 20:57:17 +00001039 # Renders all the files as strings, as some could be RelativePath
1040 # instances. It is important to do it first since there could still be
1041 # multiple entries with the same path but different modes.
1042 rendered = (
1043 fix_and_blacklist_path(f, m) for f, m in self.files.iteritems())
1044 files = sorted(
1045 (f for f in rendered if f),
1046 key=lambda x: (x[0], Results.File.ACCEPTABLE_MODES.index(x[1])))
1047 # Then converting into a dict will automatically clean up lesser
1048 # important values.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001049 files = [
maruel@chromium.org538141b2013-06-03 20:57:17 +00001050 Results.File(None, f, False, None, m)
1051 for f, m in dict(files).iteritems()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001052 ]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001053 return Results.Process(
1054 self.pid,
1055 files,
1056 fix_path(self.executable),
1057 self.command,
1058 fix_path(self.initial_cwd),
1059 [c.to_results_process() for c in self.children])
1060
maruel@chromium.org538141b2013-06-03 20:57:17 +00001061 def add_file(self, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001062 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001063 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001064 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001065 logging.debug('add_file(%d, %s, %s)', self.pid, filepath, mode)
1066 # Note that filepath and not render(filepath) is added. It is because
1067 # filepath could be something else than a string, like a RelativePath
1068 # instance for dtrace logs.
1069 modes = Results.File.ACCEPTABLE_MODES
1070 old_mode = self.files.setdefault(filepath, mode)
1071 if old_mode != mode and modes.index(old_mode) < modes.index(mode):
1072 # Take the highest value.
1073 self.files[filepath] = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001074
1075 def __init__(self, blacklist):
1076 self.blacklist = blacklist
1077 # Initial process.
1078 self.root_process = None
1079 # dict to accelerate process lookup, to not have to lookup the whole graph
1080 # each time.
1081 self._process_lookup = {}
1082
1083 class Tracer(object):
1084 """During it's lifetime, the tracing subsystem is enabled."""
1085 def __init__(self, logname):
1086 self._logname = logname
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001087 self._lock = threading.RLock()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001088 self._traces = []
1089 self._initialized = True
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001090 self._scripts_to_cleanup = []
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001091
1092 def trace(self, cmd, cwd, tracename, output):
1093 """Runs the OS-specific trace program on an executable.
1094
1095 Arguments:
1096 - cmd: The command (a list) to run.
1097 - cwd: Current directory to start the child process in.
1098 - tracename: Name of the trace in the logname file.
1099 - output: If False, redirects output to PIPEs.
1100
1101 Returns a tuple (resultcode, output) and updates the internal trace
1102 entries.
1103 """
1104 # The implementation adds an item to self._traces.
1105 raise NotImplementedError(self.__class__.__name__)
1106
1107 def close(self, _timeout=None):
1108 """Saves the meta-data in the logname file.
1109
1110 For kernel-based tracing, stops the tracing subsystem.
1111
1112 Must not be used manually when using 'with' construct.
1113 """
1114 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001115 if not self._initialized:
1116 raise TracingFailure(
1117 'Called %s.close() on an unitialized object' %
1118 self.__class__.__name__,
1119 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001120 try:
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001121 while self._scripts_to_cleanup:
1122 try:
1123 os.remove(self._scripts_to_cleanup.pop())
1124 except OSError as e:
1125 logging.error('Failed to delete a temporary script: %s', e)
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001126 write_json(self._logname, self._gen_logdata(), False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001127 finally:
1128 self._initialized = False
1129
1130 def post_process_log(self):
1131 """Post-processes the log so it becomes faster to load afterward.
1132
1133 Must not be used manually when using 'with' construct.
1134 """
1135 assert not self._initialized, 'Must stop tracing first.'
1136
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001137 def _gen_logdata(self):
1138 """Returns the data to be saved in the trace file."""
1139 return {
1140 'traces': self._traces,
1141 }
1142
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001143 def __enter__(self):
1144 """Enables 'with' statement."""
1145 return self
1146
1147 def __exit__(self, exc_type, exc_value, traceback):
1148 """Enables 'with' statement."""
1149 self.close()
1150 # If an exception was thrown, do not process logs.
1151 if not exc_type:
1152 self.post_process_log()
1153
1154 def get_tracer(self, logname):
1155 """Returns an ApiBase.Tracer instance.
1156
1157 Initializes the tracing subsystem, which is a requirement for kernel-based
1158 tracers. Only one tracer instance should be live at a time!
1159
1160 logname is the filepath to the json file that will contain the meta-data
1161 about the logs.
1162 """
1163 return self.Tracer(logname)
1164
1165 @staticmethod
1166 def clean_trace(logname):
1167 """Deletes an old log."""
1168 raise NotImplementedError()
1169
1170 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001171 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001172 """Processes trace logs and returns the files opened and the files that do
1173 not exist.
1174
1175 It does not track directories.
1176
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001177 Arguments:
1178 - logname: must be an absolute path.
1179 - blacklist: must be a lambda.
1180 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001181
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001182 Most of the time, files that do not exist are temporary test files that
1183 should be put in /tmp instead. See http://crbug.com/116251.
1184
1185 Returns a list of dict with keys:
1186 - results: A Results instance.
1187 - trace: The corresponding tracename parameter provided to
1188 get_tracer().trace().
1189 - output: Output gathered during execution, if get_tracer().trace(...,
1190 output=False) was used.
1191 """
1192 raise NotImplementedError(cls.__class__.__name__)
1193
1194
1195class Strace(ApiBase):
1196 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001197 @staticmethod
1198 def load_filename(filename):
1199 """Parses a filename in a log."""
1200 assert isinstance(filename, str)
1201 out = ''
1202 i = 0
1203 while i < len(filename):
1204 c = filename[i]
1205 if c == '\\':
1206 out += chr(int(filename[i+1:i+4], 8))
1207 i += 4
1208 else:
1209 out += c
1210 i += 1
1211 # TODO(maruel): That's not necessarily true that the current code page is
1212 # utf-8.
1213 return out.decode('utf-8')
1214
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001215 class Context(ApiBase.Context):
1216 """Processes a strace log line and keeps the list of existent and non
1217 existent files accessed.
1218
1219 Ignores directories.
1220
1221 Uses late-binding to processes the cwd of each process. The problem is that
1222 strace generates one log file per process it traced but doesn't give any
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001223 information about which process was started when and by who. So process the
1224 logs out of order and use late binding with RelativePath to be able to
1225 deduce the initial directory of each process once all the logs are parsed.
1226
1227 TODO(maruel): Use the script even in the non-sudo case, so log parsing can
1228 be done in two phase: first find the root process, then process the child
1229 processes in order. With that, it should be possible to not use RelativePath
1230 anymore. This would significantly simplify the code!
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001231 """
1232 class Process(ApiBase.Context.Process):
1233 """Represents the state of a process.
1234
1235 Contains all the information retrieved from the pid-specific log.
1236 """
1237 # Function names are using ([a-z_0-9]+)
1238 # This is the most common format. function(args) = result
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001239 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.*?)\)\s+= (.+)$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001240 # An interrupted function call, only grab the minimal header.
1241 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
1242 # A resumed function call.
1243 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
1244 # A process received a signal.
1245 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
1246 # A process didn't handle a signal. Ignore any junk appearing before,
1247 # because the process was forcibly killed so it won't open any new file.
1248 RE_KILLED = re.compile(
1249 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
1250 # The process has exited.
1251 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
1252 # A call was canceled. Ignore any prefix.
1253 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
1254 # Happens when strace fails to even get the function name.
1255 UNNAMED_FUNCTION = '????'
1256
1257 # Corner-case in python, a class member function decorator must not be
1258 # @staticmethod.
1259 def parse_args(regexp, expect_zero): # pylint: disable=E0213
1260 """Automatically convert the str 'args' into a list of processed
1261 arguments.
1262
1263 Arguments:
1264 - regexp is used to parse args.
1265 - expect_zero: one of True, False or None.
1266 - True: will check for result.startswith('0') first and will ignore
1267 the trace line completely otherwise. This is important because for
1268 many functions, the regexp will not process if the call failed.
1269 - False: will check for not result.startswith(('?', '-1')) for the
1270 same reason than with True.
1271 - None: ignore result.
1272 """
1273 def meta_hook(function):
1274 assert function.__name__.startswith('handle_')
1275 def hook(self, args, result):
1276 if expect_zero is True and not result.startswith('0'):
1277 return
1278 if expect_zero is False and result.startswith(('?', '-1')):
1279 return
1280 match = re.match(regexp, args)
1281 if not match:
1282 raise TracingFailure(
1283 'Failed to parse %s(%s) = %s' %
1284 (function.__name__[len('handle_'):], args, result),
1285 None, None, None)
1286 return function(self, match.groups(), result)
1287 return hook
1288 return meta_hook
1289
1290 class RelativePath(object):
1291 """A late-bound relative path."""
1292 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001293 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001294 self.parent = parent
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001295 assert isinstance(value, (None.__class__, str)), repr(value)
1296 self.value = Strace.load_filename(value) if value else value
1297 if self.value:
1298 assert '\\' not in self.value, value
1299 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001300
1301 def render(self):
1302 """Returns the current directory this instance is representing.
1303
1304 This function is used to return the late-bound value.
1305 """
1306 if self.value and self.value.startswith(u'/'):
1307 # An absolute path.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001308 # TODO(maruel): This is wrong, we can't assert it is utf-8.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001309 return self.value
1310 parent = self.parent.render() if self.parent else u'<None>'
1311 if self.value:
1312 return os.path.normpath(os.path.join(parent, self.value))
1313 return parent
1314
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001315 def __init__(self, root, pid):
1316 """Keeps enough information to be able to guess the original process
1317 root.
1318
1319 strace doesn't store which process was the initial process. So more
1320 information needs to be kept so the graph can be reconstructed from the
1321 flat map.
1322 """
1323 logging.info('%s(%d)' % (self.__class__.__name__, pid))
1324 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
1325 assert isinstance(root, ApiBase.Context)
1326 self._root = weakref.ref(root)
1327 # The dict key is the function name of the pending call, like 'open'
1328 # or 'execve'.
1329 self._pending_calls = {}
1330 self._line_number = 0
1331 # Current directory when the process started.
1332 self.initial_cwd = self.RelativePath(self._root(), None)
1333 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001334 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001335
1336 def get_cwd(self):
1337 """Returns the best known value of cwd."""
1338 return self.cwd or self.initial_cwd
1339
1340 def render(self):
1341 """Returns the string value of the RelativePath() object.
1342
1343 Used by RelativePath. Returns the initial directory and not the
1344 current one since the current directory 'cwd' validity is time-limited.
1345
1346 The validity is only guaranteed once all the logs are processed.
1347 """
1348 return self.initial_cwd.render()
1349
1350 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001351 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001352 self._line_number += 1
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001353 try:
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001354 if self._done:
1355 raise TracingFailure(
1356 'Found a trace for a terminated process or corrupted log',
1357 None, None, None)
1358
1359 if self.RE_SIGNAL.match(line):
1360 # Ignore signals.
1361 return
1362
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001363 match = self.RE_KILLED.match(line)
1364 if match:
1365 # Converts a '+++ killed by Foo +++' trace into an exit_group().
1366 self.handle_exit_group(match.group(1), None)
1367 return
1368
1369 match = self.RE_PROCESS_EXITED.match(line)
1370 if match:
1371 # Converts a '+++ exited with 1 +++' trace into an exit_group()
1372 self.handle_exit_group(match.group(1), None)
1373 return
1374
1375 match = self.RE_UNFINISHED.match(line)
1376 if match:
1377 if match.group(1) in self._pending_calls:
1378 raise TracingFailure(
1379 'Found two unfinished calls for the same function',
1380 None, None, None,
1381 self._pending_calls)
1382 self._pending_calls[match.group(1)] = (
1383 match.group(1) + match.group(2))
1384 return
1385
1386 match = self.RE_UNAVAILABLE.match(line)
1387 if match:
1388 # This usually means a process was killed and a pending call was
1389 # canceled.
1390 # TODO(maruel): Look up the last exit_group() trace just above and
1391 # make sure any self._pending_calls[anything] is properly flushed.
1392 return
1393
1394 match = self.RE_RESUMED.match(line)
1395 if match:
1396 if match.group(1) not in self._pending_calls:
1397 raise TracingFailure(
1398 'Found a resumed call that was not logged as unfinished',
1399 None, None, None,
1400 self._pending_calls)
1401 pending = self._pending_calls.pop(match.group(1))
1402 # Reconstruct the line.
1403 line = pending + match.group(2)
1404
1405 match = self.RE_HEADER.match(line)
1406 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001407 # The line is corrupted. It happens occasionally when a process is
1408 # killed forcibly with activity going on. Assume the process died.
1409 # No other line can be processed afterward.
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001410 logging.debug('%d is done: %s', self.pid, line)
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001411 self._done = True
1412 return
1413
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001414 if match.group(1) == self.UNNAMED_FUNCTION:
1415 return
1416
1417 # It's a valid line, handle it.
1418 handler = getattr(self, 'handle_%s' % match.group(1), None)
1419 if not handler:
1420 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1421 return handler(match.group(2), match.group(3))
1422 except TracingFailure, e:
1423 # Hack in the values since the handler could be a static function.
1424 e.pid = self.pid
1425 e.line = line
1426 e.line_number = self._line_number
1427 # Re-raise the modified exception.
1428 raise
1429 except (KeyError, NotImplementedError, ValueError), e:
1430 raise TracingFailure(
1431 'Trace generated a %s exception: %s' % (
1432 e.__class__.__name__, str(e)),
1433 self.pid,
1434 self._line_number,
1435 line,
1436 e)
1437
1438 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1439 def handle_access(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001440 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001441
1442 @parse_args(r'^\"(.+?)\"$', True)
1443 def handle_chdir(self, args, _result):
1444 """Updates cwd."""
1445 self.cwd = self.RelativePath(self, args[0])
1446 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1447
maruel@chromium.org538141b2013-06-03 20:57:17 +00001448 @parse_args(r'^\"(.+?)\", (\d+), (\d+)$', False)
1449 def handle_chown(self, args, _result):
1450 # TODO(maruel): Look at result?
1451 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org55550722013-05-28 14:43:14 +00001452
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001453 def handle_clone(self, _args, result):
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001454 self._handling_forking('clone', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001455
1456 def handle_close(self, _args, _result):
1457 pass
1458
maruel@chromium.org538141b2013-06-03 20:57:17 +00001459 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1460 def handle_chmod(self, args, _result):
1461 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001462
maruel@chromium.org538141b2013-06-03 20:57:17 +00001463 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1464 def handle_creat(self, args, _result):
1465 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001466
1467 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1468 def handle_execve(self, args, _result):
1469 # Even if in practice execve() doesn't returns when it succeeds, strace
1470 # still prints '0' as the result.
1471 filepath = args[0]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001472 self._handle_file(filepath, Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001473 self.executable = self.RelativePath(self.get_cwd(), filepath)
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001474 try:
1475 self.command = strace_process_quoted_arguments(args[1])
1476 except ValueError as e:
1477 raise TracingFailure(
1478 'Failed to process command line argument:\n%s' % e.args[0],
1479 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001480
1481 def handle_exit_group(self, _args, _result):
1482 """Removes cwd."""
1483 self.cwd = None
1484
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001485 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1486 def handle_faccessat(self, args, _results):
1487 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001488 self._handle_file(args[1], Results.File.TOUCHED)
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001489 else:
1490 raise Exception('Relative faccess not implemented.')
1491
maruel@chromium.org0781f322013-05-28 19:45:49 +00001492 def handle_fallocate(self, _args, result):
1493 pass
1494
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001495 def handle_fork(self, args, result):
1496 self._handle_unknown('fork', args, result)
1497
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001498 def handle_futex(self, _args, _result):
1499 pass
1500
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001501 def handle_getcwd(self, _args, _result):
1502 pass
1503
1504 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1505 def handle_link(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001506 self._handle_file(args[0], Results.File.READ)
1507 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001508
1509 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1510 def handle_lstat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001511 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001512
1513 def handle_mkdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001514 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001515 pass
1516
1517 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1518 def handle_open(self, args, _result):
1519 if 'O_DIRECTORY' in args[1]:
1520 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001521 self._handle_file(
1522 args[0],
1523 Results.File.READ if 'O_RDONLY' in args[1] else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001524
1525 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1526 def handle_openat(self, args, _result):
1527 if 'O_DIRECTORY' in args[2]:
1528 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001529 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001530 self._handle_file(
1531 args[1],
1532 Results.File.READ if 'O_RDONLY' in args[2]
1533 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001534 else:
1535 # TODO(maruel): Implement relative open if necessary instead of the
1536 # AT_FDCWD flag, let's hope not since this means tracking all active
1537 # directory handles.
maruel@chromium.org538141b2013-06-03 20:57:17 +00001538 raise NotImplementedError('Relative open via openat not implemented.')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001539
1540 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1541 def handle_readlink(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001542 self._handle_file(args[0], Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001543
1544 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1545 def handle_rename(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001546 self._handle_file(args[0], Results.File.READ)
1547 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001548
1549 def handle_rmdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001550 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001551 pass
1552
1553 def handle_setxattr(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001554 # TODO(maruel): self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001555 pass
1556
1557 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1558 def handle_stat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001559 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001560
maruel@chromium.org538141b2013-06-03 20:57:17 +00001561 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1562 def handle_symlink(self, args, _result):
1563 self._handle_file(args[0], Results.File.TOUCHED)
1564 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001565
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001566 @parse_args(r'^\"(.+?)\", \d+', True)
1567 def handle_truncate(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001568 self._handle_file(args[0], Results.File.WRITE)
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001569
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001570 def handle_unlink(self, _args, _result):
1571 # In theory, the file had to be created anyway.
1572 pass
1573
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001574 def handle_unlinkat(self, _args, _result):
1575 # In theory, the file had to be created anyway.
1576 pass
1577
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001578 def handle_statfs(self, _args, _result):
1579 pass
1580
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001581 def handle_utimensat(self, _args, _result):
1582 pass
1583
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001584 def handle_vfork(self, _args, result):
1585 self._handling_forking('vfork', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001586
1587 @staticmethod
1588 def _handle_unknown(function, args, result):
1589 raise TracingFailure(
1590 'Unexpected/unimplemented trace %s(%s)= %s' %
1591 (function, args, result),
1592 None, None, None)
1593
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001594 def _handling_forking(self, name, result):
1595 """Transfers cwd."""
1596 if result.startswith(('?', '-1')):
1597 # The call failed.
1598 return
1599 # Update the other process right away.
1600 childpid = int(result)
1601 child = self._root().get_or_set_proc(childpid)
1602 if child.parentid is not None or childpid in self.children:
1603 raise TracingFailure(
1604 'Found internal inconsitency in process lifetime detection '
1605 'during a %s() call' % name,
1606 None, None, None)
1607
1608 # Copy the cwd object.
1609 child.initial_cwd = self.get_cwd()
1610 child.parentid = self.pid
1611 # It is necessary because the logs are processed out of order.
1612 self.children.append(child)
1613
maruel@chromium.org538141b2013-06-03 20:57:17 +00001614 def _handle_file(self, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001615 filepath = self.RelativePath(self.get_cwd(), filepath)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001616 self.add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001617
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001618 def __init__(self, blacklist, root_pid, initial_cwd):
1619 """|root_pid| may be None when the root process is not known.
1620
1621 In that case, a search is done after reading all the logs to figure out
1622 the root process.
1623 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001624 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001625 assert_is_renderable(initial_cwd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001626 self.root_pid = root_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001627 self.initial_cwd = initial_cwd
1628
1629 def render(self):
1630 """Returns the string value of the initial cwd of the root process.
1631
1632 Used by RelativePath.
1633 """
1634 return self.initial_cwd
1635
1636 def on_line(self, pid, line):
1637 """Transfers control into the Process.on_line() function."""
1638 self.get_or_set_proc(pid).on_line(line.strip())
1639
1640 def to_results(self):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001641 """If necessary, finds back the root process and verify consistency."""
1642 if not self.root_pid:
1643 # The non-sudo case. The traced process was started by strace itself,
1644 # so the pid of the traced process is not known.
1645 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1646 if len(root) == 1:
1647 self.root_process = root[0]
1648 # Save it for later.
1649 self.root_pid = self.root_process.pid
1650 else:
1651 # The sudo case. The traced process was started manually so its pid is
1652 # known.
1653 self.root_process = self._process_lookup.get(self.root_pid)
1654 if not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001655 raise TracingFailure(
1656 'Found internal inconsitency in process lifetime detection '
1657 'while finding the root process',
1658 None,
1659 None,
1660 None,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001661 self.root_pid,
1662 sorted(self._process_lookup))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001663 process = self.root_process.to_results_process()
1664 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1665 raise TracingFailure(
1666 'Found internal inconsitency in process lifetime detection '
1667 'while looking for len(tree) == len(list)',
1668 None,
1669 None,
1670 None,
1671 sorted(self._process_lookup),
1672 sorted(p.pid for p in process.all))
1673 return Results(process)
1674
1675 def get_or_set_proc(self, pid):
1676 """Returns the Context.Process instance for this pid or creates a new one.
1677 """
1678 if not pid or not isinstance(pid, int):
1679 raise TracingFailure(
1680 'Unpexpected value for pid: %r' % pid,
1681 pid,
1682 None,
1683 None,
1684 pid)
1685 if pid not in self._process_lookup:
1686 self._process_lookup[pid] = self.Process(self, pid)
1687 return self._process_lookup[pid]
1688
1689 @classmethod
1690 def traces(cls):
1691 """Returns the list of all handled traces to pass this as an argument to
1692 strace.
1693 """
1694 prefix = 'handle_'
1695 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1696
1697 class Tracer(ApiBase.Tracer):
1698 MAX_LEN = 256
1699
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001700 def __init__(self, logname, use_sudo):
1701 super(Strace.Tracer, self).__init__(logname)
1702 self.use_sudo = use_sudo
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001703 if use_sudo:
1704 # TODO(maruel): Use the jump script systematically to make it easy to
1705 # figure out the root process, so RelativePath is not necessary anymore.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001706 self._child_script = create_exec_thunk()
1707 self._scripts_to_cleanup.append(self._child_script)
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001708
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001709 def trace(self, cmd, cwd, tracename, output):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001710 """Runs strace on an executable.
1711
1712 When use_sudo=True, it is a 3-phases process: start the thunk, start
1713 sudo strace with the pid of the thunk and then have the thunk os.execve()
1714 the process to trace.
1715 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001716 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1717 assert os.path.isabs(cmd[0]), cmd[0]
1718 assert os.path.isabs(cwd), cwd
1719 assert os.path.normpath(cwd) == cwd, cwd
1720 with self._lock:
1721 if not self._initialized:
1722 raise TracingFailure(
1723 'Called Tracer.trace() on an unitialized object',
1724 None, None, None, tracename)
1725 assert tracename not in (i['trace'] for i in self._traces)
1726 stdout = stderr = None
1727 if output:
1728 stdout = subprocess.PIPE
1729 stderr = subprocess.STDOUT
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001730
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001731 # Ensure all file related APIs are hooked.
1732 traces = ','.join(Strace.Context.traces() + ['file'])
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001733 flags = [
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001734 # Each child process has its own trace file. It is necessary because
1735 # strace may generate corrupted log file if multiple processes are
1736 # heavily doing syscalls simultaneously.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001737 '-ff',
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001738 # Reduce whitespace usage.
1739 '-a1',
1740 # hex encode non-ascii strings.
1741 # TODO(maruel): '-x',
1742 # TODO(maruel): '-ttt',
1743 # Signals are unnecessary noise here. Note the parser can cope with them
1744 # but reduce the unnecessary output.
1745 '-esignal=none',
1746 # Print as much data as wanted.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001747 '-s', '%d' % self.MAX_LEN,
1748 '-e', 'trace=%s' % traces,
1749 '-o', self._logname + '.' + tracename,
1750 ]
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001751
1752 if self.use_sudo:
1753 pipe_r, pipe_w = os.pipe()
1754 # Start the child process paused.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001755 target_cmd = [sys.executable, self._child_script, str(pipe_r)] + cmd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001756 logging.debug(' '.join(target_cmd))
1757 child_proc = subprocess.Popen(
1758 target_cmd,
1759 stdin=subprocess.PIPE,
1760 stdout=stdout,
1761 stderr=stderr,
1762 cwd=cwd)
1763
1764 # TODO(maruel): both processes must use the same UID for it to work
1765 # without sudo. Look why -p is failing at the moment without sudo.
1766 trace_cmd = [
1767 'sudo',
1768 'strace',
1769 '-p', str(child_proc.pid),
1770 ] + flags
1771 logging.debug(' '.join(trace_cmd))
1772 strace_proc = subprocess.Popen(
1773 trace_cmd,
1774 cwd=cwd,
1775 stdin=subprocess.PIPE,
1776 stdout=subprocess.PIPE,
1777 stderr=subprocess.PIPE)
1778
1779 line = strace_proc.stderr.readline()
1780 if not re.match(r'^Process \d+ attached \- interrupt to quit$', line):
1781 # TODO(maruel): Raise an exception.
1782 assert False, line
1783
1784 # Now fire the child process.
1785 os.write(pipe_w, 'x')
1786
1787 out = child_proc.communicate()[0]
1788 strace_out = strace_proc.communicate()[0]
1789
1790 # TODO(maruel): if strace_proc.returncode: Add an exception.
1791 saved_out = strace_out if strace_proc.returncode else out
1792 root_pid = child_proc.pid
1793 else:
1794 # Non-sudo case.
1795 trace_cmd = [
1796 'strace',
1797 ] + flags + cmd
1798 logging.debug(' '.join(trace_cmd))
1799 child_proc = subprocess.Popen(
1800 trace_cmd,
1801 cwd=cwd,
1802 stdin=subprocess.PIPE,
1803 stdout=stdout,
1804 stderr=stderr)
1805 out = child_proc.communicate()[0]
1806 # TODO(maruel): Walk the logs and figure out the root process would
1807 # simplify parsing the logs a *lot*.
1808 saved_out = out
1809 # The trace reader will have to figure out.
1810 root_pid = None
1811
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001812 with self._lock:
1813 assert tracename not in (i['trace'] for i in self._traces)
1814 self._traces.append(
1815 {
1816 'cmd': cmd,
1817 'cwd': cwd,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001818 'output': saved_out,
1819 'pid': root_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001820 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001821 })
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001822 return child_proc.returncode, out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001823
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001824 def __init__(self, use_sudo=None):
1825 super(Strace, self).__init__()
1826 self.use_sudo = use_sudo
1827
1828 def get_tracer(self, logname):
1829 return self.Tracer(logname, self.use_sudo)
1830
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001831 @staticmethod
1832 def clean_trace(logname):
1833 if os.path.isfile(logname):
1834 os.remove(logname)
1835 # Also delete any pid specific file from previous traces.
1836 for i in glob.iglob(logname + '.*'):
1837 if i.rsplit('.', 1)[1].isdigit():
1838 os.remove(i)
1839
1840 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001841 def parse_log(cls, logname, blacklist, trace_name):
1842 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001843 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001844 data = read_json(logname)
1845 out = []
1846 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001847 if trace_name and item['trace'] != trace_name:
1848 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001849 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001850 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001851 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001852 }
1853 try:
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001854 context = cls.Context(blacklist, item['pid'], item['cwd'])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001855 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001856 logging.debug('Reading %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001857 pid = pidfile.rsplit('.', 1)[1]
1858 if pid.isdigit():
1859 pid = int(pid)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001860 found_line = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001861 for line in open(pidfile, 'rb'):
1862 context.on_line(pid, line)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001863 found_line = True
1864 if not found_line:
1865 # Ensures that a completely empty trace still creates the
1866 # corresponding Process instance by logging a dummy line.
1867 context.on_line(pid, '')
1868 else:
1869 logging.warning('Found unexpected file %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001870 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001871 except TracingFailure:
1872 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001873 out.append(result)
1874 return out
1875
1876
1877class Dtrace(ApiBase):
1878 """Uses DTrace framework through dtrace. Requires root access.
1879
1880 Implies Mac OSX.
1881
1882 dtruss can't be used because it has compatibility issues with python.
1883
1884 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1885 get the absolute path of the 'cwd' dtrace variable from the probe.
1886
1887 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1888 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1889 manually.
1890
1891 errno is not printed in the log since this implementation currently only cares
1892 about files that were successfully opened.
1893 """
1894 class Context(ApiBase.Context):
1895 # Format: index pid function(args)
1896 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1897
1898 # Arguments parsing.
1899 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1900 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1901 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1902 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1903 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1904 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1905
maruel@chromium.org538141b2013-06-03 20:57:17 +00001906 O_DIRECTORY = os.O_DIRECTORY
1907 O_RDWR = os.O_RDWR
1908 O_WRONLY = os.O_WRONLY
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001909
1910 class Process(ApiBase.Context.Process):
1911 def __init__(self, *args):
1912 super(Dtrace.Context.Process, self).__init__(*args)
1913 self.cwd = self.initial_cwd
1914
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001915 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001916 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001917 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001918 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001919 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001920 # Process ID of the temporary script created by create_subprocess_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001921 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001922 self._initial_cwd = initial_cwd
1923 self._line_number = 0
1924
1925 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001926 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001927 self._line_number += 1
1928 match = self.RE_HEADER.match(line)
1929 if not match:
1930 raise TracingFailure(
1931 'Found malformed line: %s' % line,
1932 None,
1933 self._line_number,
1934 line)
1935 fn = getattr(
1936 self,
1937 'handle_%s' % match.group(2).replace('-', '_'),
1938 self._handle_ignored)
1939 # It is guaranteed to succeed because of the regexp. Or at least I thought
1940 # it would.
1941 pid = int(match.group(1))
1942 try:
1943 return fn(pid, match.group(3))
1944 except TracingFailure, e:
1945 # Hack in the values since the handler could be a static function.
1946 e.pid = pid
1947 e.line = line
1948 e.line_number = self._line_number
1949 # Re-raise the modified exception.
1950 raise
1951 except (KeyError, NotImplementedError, ValueError), e:
1952 raise TracingFailure(
1953 'Trace generated a %s exception: %s' % (
1954 e.__class__.__name__, str(e)),
1955 pid,
1956 self._line_number,
1957 line,
1958 e)
1959
1960 def to_results(self):
1961 process = self.root_process.to_results_process()
1962 # Internal concistency check.
1963 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1964 raise TracingFailure(
1965 'Found internal inconsitency in process lifetime detection '
1966 'while looking for len(tree) == len(list)',
1967 None,
1968 None,
1969 None,
1970 sorted(self._process_lookup),
1971 sorted(p.pid for p in process.all))
1972 return Results(process)
1973
1974 def handle_dtrace_BEGIN(self, _pid, args):
1975 if not self.RE_DTRACE_BEGIN.match(args):
1976 raise TracingFailure(
1977 'Found internal inconsitency in dtrace_BEGIN log line',
1978 None, None, None)
1979
1980 def handle_proc_start(self, pid, args):
1981 """Transfers cwd.
1982
1983 The dtrace script already takes care of only tracing the processes that
1984 are child of the traced processes so there is no need to verify the
1985 process hierarchy.
1986 """
1987 if pid in self._process_lookup:
1988 raise TracingFailure(
1989 'Found internal inconsitency in proc_start: %d started two times' %
1990 pid,
1991 None, None, None)
1992 match = self.RE_PROC_START.match(args)
1993 if not match:
1994 raise TracingFailure(
1995 'Failed to parse arguments: %s' % args,
1996 None, None, None)
1997 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001998 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001999 proc = self.root_process = self.Process(
2000 self.blacklist, pid, self._initial_cwd)
2001 elif ppid in self._process_lookup:
2002 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
2003 self._process_lookup[ppid].children.append(proc)
2004 else:
2005 # Another process tree, ignore.
2006 return
2007 self._process_lookup[pid] = proc
2008 logging.debug(
2009 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002010 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002011
2012 def handle_proc_exit(self, pid, _args):
2013 """Removes cwd."""
2014 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002015 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002016 self._process_lookup[pid].cwd = None
2017
2018 def handle_execve(self, pid, args):
2019 """Sets the process' executable.
2020
2021 TODO(maruel): Read command line arguments. See
2022 https://discussions.apple.com/thread/1980539 for an example.
2023 https://gist.github.com/1242279
2024
2025 Will have to put the answer at http://stackoverflow.com/questions/7556249.
2026 :)
2027 """
2028 if not pid in self._process_lookup:
2029 # Another process tree, ignore.
2030 return
2031 match = self.RE_EXECVE.match(args)
2032 if not match:
2033 raise TracingFailure(
2034 'Failed to parse arguments: %r' % args,
2035 None, None, None)
2036 proc = self._process_lookup[pid]
2037 proc.executable = match.group(1)
maruel@chromium.org538141b2013-06-03 20:57:17 +00002038 self._handle_file(pid, proc.executable, Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002039 proc.command = self.process_escaped_arguments(match.group(3))
2040 if int(match.group(2)) != len(proc.command):
2041 raise TracingFailure(
2042 'Failed to parse execve() arguments: %s' % args,
2043 None, None, None)
2044
2045 def handle_chdir(self, pid, args):
2046 """Updates cwd."""
2047 if pid not in self._process_lookup:
2048 # Another process tree, ignore.
2049 return
2050 cwd = self.RE_CHDIR.match(args).group(1)
2051 if not cwd.startswith('/'):
2052 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
2053 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
2054 else:
2055 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
2056 cwd2 = cwd
2057 self._process_lookup[pid].cwd = cwd2
2058
2059 def handle_open_nocancel(self, pid, args):
2060 """Redirects to handle_open()."""
2061 return self.handle_open(pid, args)
2062
2063 def handle_open(self, pid, args):
2064 if pid not in self._process_lookup:
2065 # Another process tree, ignore.
2066 return
2067 match = self.RE_OPEN.match(args)
2068 if not match:
2069 raise TracingFailure(
2070 'Failed to parse arguments: %s' % args,
2071 None, None, None)
2072 flag = int(match.group(2), 16)
2073 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
2074 # Ignore directories.
2075 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002076 self._handle_file(
2077 pid,
2078 match.group(1),
2079 Results.File.READ if not ((self.O_RDWR | self.O_WRONLY) & flag)
2080 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002081
2082 def handle_rename(self, pid, args):
2083 if pid not in self._process_lookup:
2084 # Another process tree, ignore.
2085 return
2086 match = self.RE_RENAME.match(args)
2087 if not match:
2088 raise TracingFailure(
2089 'Failed to parse arguments: %s' % args,
2090 None, None, None)
maruel@chromium.org538141b2013-06-03 20:57:17 +00002091 self._handle_file(pid, match.group(1), Results.File.READ)
2092 self._handle_file(pid, match.group(2), Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002093
maruel@chromium.org538141b2013-06-03 20:57:17 +00002094 def _handle_file(self, pid, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002095 if not filepath.startswith('/'):
2096 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
2097 # We can get '..' in the path.
2098 filepath = os.path.normpath(filepath)
2099 # Sadly, still need to filter out directories here;
2100 # saw open_nocancel(".", 0, 0) = 0 lines.
2101 if os.path.isdir(filepath):
2102 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002103 self._process_lookup[pid].add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002104
2105 def handle_ftruncate(self, pid, args):
2106 """Just used as a signal to kill dtrace, ignoring."""
2107 pass
2108
2109 @staticmethod
2110 def _handle_ignored(pid, args):
2111 """Is called for all the event traces that are not handled."""
2112 raise NotImplementedError('Please implement me')
2113
2114 @staticmethod
2115 def process_escaped_arguments(text):
2116 """Extracts escaped arguments on a string and return the arguments as a
2117 list.
2118
2119 Implemented as an automaton.
2120
2121 Example:
2122 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
2123 function will return ['python2.7', '-c', 'print("hi")]
2124 """
2125 if not text.endswith('\\0'):
2126 raise ValueError('String is not null terminated: %r' % text, text)
2127 text = text[:-2]
2128
2129 def unescape(x):
2130 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
2131 out = []
2132 escaped = False
2133 for i in x:
2134 if i == '\\' and not escaped:
2135 escaped = True
2136 continue
2137 escaped = False
2138 out.append(i)
2139 return ''.join(out)
2140
2141 return [unescape(i) for i in text.split('\\001')]
2142
2143 class Tracer(ApiBase.Tracer):
2144 # pylint: disable=C0301
2145 #
2146 # To understand the following code, you'll want to take a look at:
2147 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
2148 # https://wikis.oracle.com/display/DTrace/Variables
2149 # http://docs.oracle.com/cd/E19205-01/820-4221/
2150 #
2151 # 0. Dump all the valid probes into a text file. It is important, you
2152 # want to redirect into a file and you don't want to constantly 'sudo'.
2153 # $ sudo dtrace -l > probes.txt
2154 #
2155 # 1. Count the number of probes:
2156 # $ wc -l probes.txt
2157 # 81823 # On OSX 10.7, including 1 header line.
2158 #
2159 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
2160 # likes and skipping the header with NR>1:
2161 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
2162 # dtrace
2163 # fbt
2164 # io
2165 # ip
2166 # lockstat
2167 # mach_trap
2168 # proc
2169 # profile
2170 # sched
2171 # syscall
2172 # tcp
2173 # vminfo
2174 #
2175 # 3. List of valid probes:
2176 # $ grep syscall probes.txt | less
2177 # or use dtrace directly:
2178 # $ sudo dtrace -l -P syscall | less
2179 #
2180 # trackedpid is an associative array where its value can be 0, 1 or 2.
2181 # 0 is for untracked processes and is the default value for items not
2182 # in the associative array.
2183 # 1 is for tracked processes.
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002184 # 2 is for the script created by create_subprocess_thunk() only. It is not
2185 # tracked itself but all its decendants are.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002186 #
2187 # The script will kill itself only once waiting_to_die == 1 and
2188 # current_processes == 0, so that both getlogin() was called and that
2189 # all traced processes exited.
2190 #
2191 # TODO(maruel): Use cacheable predicates. See
2192 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
2193 D_CODE = """
2194 dtrace:::BEGIN {
2195 waiting_to_die = 0;
2196 current_processes = 0;
2197 logindex = 0;
2198 printf("%d %d %s_%s(\\"%s\\")\\n",
2199 logindex, PID, probeprov, probename, SCRIPT);
2200 logindex++;
2201 }
2202
2203 proc:::start /trackedpid[ppid]/ {
2204 trackedpid[pid] = 1;
2205 current_processes += 1;
2206 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
2207 logindex, pid, probeprov, probename,
2208 ppid,
2209 execname,
2210 current_processes);
2211 logindex++;
2212 }
2213 /* Should use SCRIPT but there is no access to this variable at that
2214 * point. */
2215 proc:::start /ppid == PID && execname == "Python"/ {
2216 trackedpid[pid] = 2;
2217 current_processes += 1;
2218 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
2219 logindex, pid, probeprov, probename,
2220 ppid,
2221 execname,
2222 current_processes);
2223 logindex++;
2224 }
2225 proc:::exit /trackedpid[pid] &&
2226 current_processes == 1 &&
2227 waiting_to_die == 1/ {
2228 trackedpid[pid] = 0;
2229 current_processes -= 1;
2230 printf("%d %d %s_%s(%d)\\n",
2231 logindex, pid, probeprov, probename,
2232 current_processes);
2233 logindex++;
2234 exit(0);
2235 }
2236 proc:::exit /trackedpid[pid]/ {
2237 trackedpid[pid] = 0;
2238 current_processes -= 1;
2239 printf("%d %d %s_%s(%d)\\n",
2240 logindex, pid, probeprov, probename,
2241 current_processes);
2242 logindex++;
2243 }
2244
2245 /* Use an arcane function to detect when we need to die */
2246 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
2247 waiting_to_die = 1;
2248 printf("%d %d %s()\\n", logindex, pid, probefunc);
2249 logindex++;
2250 }
2251 syscall::ftruncate:entry /
2252 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
2253 exit(0);
2254 }
2255
2256 syscall::open*:entry /trackedpid[pid] == 1/ {
2257 self->open_arg0 = arg0;
2258 self->open_arg1 = arg1;
2259 self->open_arg2 = arg2;
2260 }
2261 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
2262 this->open_arg0 = copyinstr(self->open_arg0);
2263 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
2264 logindex, pid, probefunc,
2265 this->open_arg0,
2266 self->open_arg1,
2267 self->open_arg2);
2268 logindex++;
2269 this->open_arg0 = 0;
2270 }
2271 syscall::open*:return /trackedpid[pid] == 1/ {
2272 self->open_arg0 = 0;
2273 self->open_arg1 = 0;
2274 self->open_arg2 = 0;
2275 }
2276
2277 syscall::rename:entry /trackedpid[pid] == 1/ {
2278 self->rename_arg0 = arg0;
2279 self->rename_arg1 = arg1;
2280 }
2281 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
2282 this->rename_arg0 = copyinstr(self->rename_arg0);
2283 this->rename_arg1 = copyinstr(self->rename_arg1);
2284 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
2285 logindex, pid, probefunc,
2286 this->rename_arg0,
2287 this->rename_arg1);
2288 logindex++;
2289 this->rename_arg0 = 0;
2290 this->rename_arg1 = 0;
2291 }
2292 syscall::rename:return /trackedpid[pid] == 1/ {
2293 self->rename_arg0 = 0;
2294 self->rename_arg1 = 0;
2295 }
2296
2297 /* Track chdir, it's painful because it is only receiving relative path.
2298 */
2299 syscall::chdir:entry /trackedpid[pid] == 1/ {
2300 self->chdir_arg0 = arg0;
2301 }
2302 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
2303 this->chdir_arg0 = copyinstr(self->chdir_arg0);
2304 printf("%d %d %s(\\"%s\\")\\n",
2305 logindex, pid, probefunc,
2306 this->chdir_arg0);
2307 logindex++;
2308 this->chdir_arg0 = 0;
2309 }
2310 syscall::chdir:return /trackedpid[pid] == 1/ {
2311 self->chdir_arg0 = 0;
2312 }
2313 """
2314
2315 # execve-specific code, tends to throw a lot of exceptions.
2316 D_CODE_EXECVE = """
2317 /* Finally what we care about! */
2318 syscall::exec*:entry /trackedpid[pid]/ {
2319 self->exec_arg0 = copyinstr(arg0);
2320 /* Incrementally probe for a NULL in the argv parameter of execve() to
2321 * figure out argc. */
2322 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
2323 * found. */
2324 self->exec_argc = 0;
2325 /* Probe for argc==1 */
2326 this->exec_argv = (user_addr_t*)copyin(
2327 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2328 self->exec_argc = this->exec_argv[self->exec_argc] ?
2329 (self->exec_argc + 1) : self->exec_argc;
2330
2331 /* Probe for argc==2 */
2332 this->exec_argv = (user_addr_t*)copyin(
2333 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2334 self->exec_argc = this->exec_argv[self->exec_argc] ?
2335 (self->exec_argc + 1) : self->exec_argc;
2336
2337 /* Probe for argc==3 */
2338 this->exec_argv = (user_addr_t*)copyin(
2339 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2340 self->exec_argc = this->exec_argv[self->exec_argc] ?
2341 (self->exec_argc + 1) : self->exec_argc;
2342
2343 /* Probe for argc==4 */
2344 this->exec_argv = (user_addr_t*)copyin(
2345 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2346 self->exec_argc = this->exec_argv[self->exec_argc] ?
2347 (self->exec_argc + 1) : self->exec_argc;
2348
2349 /* Copy the inputs strings since there is no guarantee they'll be
2350 * present after the call completed. */
2351 self->exec_argv0 = (self->exec_argc > 0) ?
2352 copyinstr(this->exec_argv[0]) : "";
2353 self->exec_argv1 = (self->exec_argc > 1) ?
2354 copyinstr(this->exec_argv[1]) : "";
2355 self->exec_argv2 = (self->exec_argc > 2) ?
2356 copyinstr(this->exec_argv[2]) : "";
2357 self->exec_argv3 = (self->exec_argc > 3) ?
2358 copyinstr(this->exec_argv[3]) : "";
2359 this->exec_argv = 0;
2360 }
2361 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2362 /* We need to join strings here, as using multiple printf() would
2363 * cause tearing when multiple threads/processes are traced.
2364 * Since it is impossible to escape a string and join it to another one,
2365 * like sprintf("%s%S", previous, more), use hackery.
2366 * Each of the elements are split with a \\1. \\0 cannot be used because
2367 * it is simply ignored. This will conflict with any program putting a
2368 * \\1 in their execve() string but this should be "rare enough" */
2369 this->args = "";
2370 /* Process exec_argv[0] */
2371 this->args = strjoin(
2372 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2373
2374 /* Process exec_argv[1] */
2375 this->args = strjoin(
2376 this->args, (self->exec_argc > 1) ? "\\1" : "");
2377 this->args = strjoin(
2378 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2379
2380 /* Process exec_argv[2] */
2381 this->args = strjoin(
2382 this->args, (self->exec_argc > 2) ? "\\1" : "");
2383 this->args = strjoin(
2384 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2385
2386 /* Process exec_argv[3] */
2387 this->args = strjoin(
2388 this->args, (self->exec_argc > 3) ? "\\1" : "");
2389 this->args = strjoin(
2390 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2391
2392 /* Prints self->exec_argc to permits verifying the internal
2393 * consistency since this code is quite fishy. */
2394 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2395 logindex, pid, probefunc,
2396 self->exec_arg0,
2397 self->exec_argc,
2398 this->args);
2399 logindex++;
2400 this->args = 0;
2401 }
2402 syscall::exec*:return /trackedpid[pid]/ {
2403 self->exec_arg0 = 0;
2404 self->exec_argc = 0;
2405 self->exec_argv0 = 0;
2406 self->exec_argv1 = 0;
2407 self->exec_argv2 = 0;
2408 self->exec_argv3 = 0;
2409 }
2410 """
2411
2412 # Code currently not used.
2413 D_EXTRANEOUS = """
2414 /* This is a good learning experience, since it traces a lot of things
2415 * related to the process and child processes.
2416 * Warning: it generates a gigantic log. For example, tracing
2417 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2418 * several minutes to execute.
2419 */
2420 /*
2421 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2422 printf("%d %d %s_%s() = %d\\n",
2423 logindex, pid, probeprov, probefunc, errno);
2424 logindex++;
2425 }
2426 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2427 printf("%d %d %s_%s() = %d\\n",
2428 logindex, pid, probeprov, probefunc, errno);
2429 logindex++;
2430 }
2431 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2432 printf("%d %d %s_%s() = %d\\n",
2433 logindex, pid, probeprov, probefunc, errno);
2434 logindex++;
2435 }
2436 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2437 printf("%d %d %s_%s() = %d\\n",
2438 logindex, pid, probeprov, probefunc, errno);
2439 logindex++;
2440 }
2441 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2442 printf("%d %d %s_%s() = %d\\n",
2443 logindex, pid, probeprov, probefunc, errno);
2444 logindex++;
2445 }
2446 */
2447 /* TODO(maruel): *stat* functions and friends
2448 syscall::access:return,
2449 syscall::chdir:return,
2450 syscall::chflags:return,
2451 syscall::chown:return,
2452 syscall::chroot:return,
2453 syscall::getattrlist:return,
2454 syscall::getxattr:return,
2455 syscall::lchown:return,
2456 syscall::lstat64:return,
2457 syscall::lstat:return,
2458 syscall::mkdir:return,
2459 syscall::pathconf:return,
2460 syscall::readlink:return,
2461 syscall::removexattr:return,
2462 syscall::setxattr:return,
2463 syscall::stat64:return,
2464 syscall::stat:return,
2465 syscall::truncate:return,
2466 syscall::unlink:return,
2467 syscall::utimes:return,
2468 */
2469 """
2470
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002471 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002472 """Starts the log collection with dtrace.
2473
2474 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2475 this needs to wait for dtrace to be "warmed up".
2476 """
2477 super(Dtrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002478 # This script is used as a signal to figure out the root process.
2479 self._signal_script = create_subprocess_thunk()
2480 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002481 # This unique dummy temp file is used to signal the dtrace script that it
2482 # should stop as soon as all the child processes are done. A bit hackish
2483 # but works fine enough.
2484 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2485 prefix='trace_signal_file')
2486
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002487 dtrace_path = '/usr/sbin/dtrace'
2488 if not os.path.isfile(dtrace_path):
2489 dtrace_path = 'dtrace'
2490 elif use_sudo is None and (os.stat(dtrace_path).st_mode & stat.S_ISUID):
2491 # No need to sudo. For those following at home, don't do that.
2492 use_sudo = False
2493
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002494 # Note: do not use the -p flag. It's useless if the initial process quits
2495 # too fast, resulting in missing traces from the grand-children. The D
2496 # code manages the dtrace lifetime itself.
2497 trace_cmd = [
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002498 dtrace_path,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002499 # Use a larger buffer if getting 'out of scratch space' errors.
2500 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2501 '-b', '10m',
2502 '-x', 'dynvarsize=10m',
2503 #'-x', 'dtrace_global_maxsize=1m',
2504 '-x', 'evaltime=exec',
2505 '-o', '/dev/stderr',
2506 '-q',
2507 '-n', self._get_dtrace_code(),
2508 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002509 if use_sudo is not False:
2510 trace_cmd.insert(0, 'sudo')
2511
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002512 with open(self._logname + '.log', 'wb') as logfile:
2513 self._dtrace = subprocess.Popen(
2514 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2515 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2516
2517 # Reads until one line is printed, which signifies dtrace is up and ready.
2518 with open(self._logname + '.log', 'rb') as logfile:
2519 while 'dtrace_BEGIN' not in logfile.readline():
2520 if self._dtrace.poll() is not None:
2521 # Do a busy wait. :/
2522 break
2523 logging.debug('dtrace started')
2524
2525 def _get_dtrace_code(self):
2526 """Setups the D code to implement child process tracking.
2527
2528 Injects the cookie in the script so it knows when to stop.
2529
2530 The script will detect any instance of the script created with
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002531 create_subprocess_thunk() and will start tracing it.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002532 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002533 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002534 'inline int PID = %d;\n'
2535 'inline string SCRIPT = "%s";\n'
2536 'inline int FILE_ID = %d;\n'
2537 '\n'
2538 '%s') % (
2539 os.getpid(),
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002540 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002541 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002542 self.D_CODE)
2543 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2544 # Do not enable by default since it tends to spew dtrace: error lines
2545 # because the execve() parameters are not in valid memory at the time of
2546 # logging.
2547 # TODO(maruel): Find a way to make this reliable since it's useful but
2548 # only works in limited/trivial uses cases for now.
2549 out += self.D_CODE_EXECVE
2550 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002551
2552 def trace(self, cmd, cwd, tracename, output):
2553 """Runs dtrace on an executable.
2554
2555 This dtruss is broken when it starts the process itself or when tracing
2556 child processes, this code starts a wrapper process
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002557 generated with create_subprocess_thunk() which starts the executable to
2558 trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002559 """
2560 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2561 assert os.path.isabs(cmd[0]), cmd[0]
2562 assert os.path.isabs(cwd), cwd
2563 assert os.path.normpath(cwd) == cwd, cwd
2564 with self._lock:
2565 if not self._initialized:
2566 raise TracingFailure(
2567 'Called Tracer.trace() on an unitialized object',
2568 None, None, None, tracename)
2569 assert tracename not in (i['trace'] for i in self._traces)
2570
2571 # Starts the script wrapper to start the child process. This signals the
2572 # dtrace script that this process is to be traced.
2573 stdout = stderr = None
2574 if output:
2575 stdout = subprocess.PIPE
2576 stderr = subprocess.STDOUT
2577 child_cmd = [
2578 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002579 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002580 tracename,
2581 ]
2582 # Call a dummy function so that dtrace knows I'm about to launch a process
2583 # that needs to be traced.
2584 # Yummy.
2585 child = subprocess.Popen(
2586 child_cmd + fix_python_path(cmd),
2587 stdin=subprocess.PIPE,
2588 stdout=stdout,
2589 stderr=stderr,
2590 cwd=cwd)
2591 logging.debug('Started child pid: %d' % child.pid)
2592
2593 out = child.communicate()[0]
2594 # This doesn't mean tracing is done, one of the grand-child process may
2595 # still be alive. It will be tracked with the dtrace script.
2596
2597 with self._lock:
2598 assert tracename not in (i['trace'] for i in self._traces)
2599 self._traces.append(
2600 {
2601 'cmd': cmd,
2602 'cwd': cwd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002603 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002604 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002605 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002606 })
2607 return child.returncode, out
2608
2609 def close(self, timeout=None):
2610 """Terminates dtrace."""
2611 logging.debug('close(%s)' % timeout)
2612 try:
2613 try:
2614 super(Dtrace.Tracer, self).close(timeout)
2615 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002616 # ftruncate doesn't exist on Windows.
2617 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002618 if timeout:
2619 start = time.time()
2620 # Use polling. :/
2621 while (self._dtrace.poll() is None and
2622 (time.time() - start) < timeout):
2623 time.sleep(0.1)
2624 self._dtrace.kill()
2625 self._dtrace.wait()
2626 finally:
2627 # Make sure to kill it in any case.
2628 if self._dtrace.poll() is None:
2629 try:
2630 self._dtrace.kill()
2631 self._dtrace.wait()
2632 except OSError:
2633 pass
2634
2635 if self._dtrace.returncode != 0:
2636 # Warn about any dtrace failure but basically ignore it.
2637 print 'dtrace failure: %s' % self._dtrace.returncode
2638 finally:
2639 os.close(self._dummy_file_id)
2640 os.remove(self._dummy_file_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002641
2642 def post_process_log(self):
2643 """Sorts the log back in order when each call occured.
2644
2645 dtrace doesn't save the buffer in strict order since it keeps one buffer
2646 per CPU.
2647 """
2648 super(Dtrace.Tracer, self).post_process_log()
2649 logname = self._logname + '.log'
2650 with open(logname, 'rb') as logfile:
2651 lines = [l for l in logfile if l.strip()]
2652 errors = [l for l in lines if l.startswith('dtrace:')]
2653 if errors:
2654 raise TracingFailure(
2655 'Found errors in the trace: %s' % '\n'.join(errors),
2656 None, None, None, logname)
2657 try:
2658 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2659 except ValueError:
2660 raise TracingFailure(
2661 'Found errors in the trace: %s' % '\n'.join(
2662 l for l in lines if l.split(' ', 1)[0].isdigit()),
2663 None, None, None, logname)
2664 with open(logname, 'wb') as logfile:
2665 logfile.write(''.join(lines))
2666
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002667 def __init__(self, use_sudo=None):
2668 super(Dtrace, self).__init__()
2669 self.use_sudo = use_sudo
2670
2671 def get_tracer(self, logname):
2672 return self.Tracer(logname, self.use_sudo)
2673
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002674 @staticmethod
2675 def clean_trace(logname):
2676 for ext in ('', '.log'):
2677 if os.path.isfile(logname + ext):
2678 os.remove(logname + ext)
2679
2680 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002681 def parse_log(cls, logname, blacklist, trace_name):
2682 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002683 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002684
2685 def blacklist_more(filepath):
2686 # All the HFS metadata is in the form /.vol/...
2687 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2688
2689 data = read_json(logname)
2690 out = []
2691 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002692 if trace_name and item['trace'] != trace_name:
2693 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002694 result = {
2695 'output': item['output'],
2696 'trace': item['trace'],
2697 }
2698 try:
2699 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002700 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2701 # be valid UTF-8 and we control the log output.
2702 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002703 context.on_line(line)
2704 result['results'] = context.to_results()
2705 except TracingFailure:
2706 result['exception'] = sys.exc_info()
2707 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002708 return out
2709
2710
2711class LogmanTrace(ApiBase):
2712 """Uses the native Windows ETW based tracing functionality to trace a child
2713 process.
2714
2715 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2716 the Windows Kernel doesn't have a concept of 'current working directory' at
2717 all. A Win32 process has a map of current directories, one per drive letter
2718 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2719 opened relative to another file_object or as an absolute path. All the current
2720 working directory logic is done in user mode.
2721 """
2722 class Context(ApiBase.Context):
2723 """Processes a ETW log line and keeps the list of existent and non
2724 existent files accessed.
2725
2726 Ignores directories.
2727 """
2728 # These indexes are for the stripped version in json.
2729 EVENT_NAME = 0
2730 TYPE = 1
2731 PID = 2
2732 TID = 3
2733 PROCESSOR_ID = 4
2734 TIMESTAMP = 5
2735 USER_DATA = 6
2736
2737 class Process(ApiBase.Context.Process):
2738 def __init__(self, *args):
2739 super(LogmanTrace.Context.Process, self).__init__(*args)
2740 # Handle file objects that succeeded.
2741 self.file_objects = {}
2742
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002743 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2744 logging.info(
2745 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2746 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002747 super(LogmanTrace.Context, self).__init__(blacklist)
2748 self._drive_map = DosDriveMap()
2749 # Threads mapping to the corresponding process id.
2750 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002751 # Process ID of the tracer, e.g. the temporary script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002752 # create_subprocess_thunk(). This is tricky because the process id may
2753 # have been reused.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002754 self._thunk_pid = thunk_pid
2755 self._thunk_cmd = thunk_cmd
2756 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002757 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002758 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002759
2760 def on_line(self, line):
2761 """Processes a json Event line."""
2762 self._line_number += 1
2763 try:
2764 # By Opcode
2765 handler = getattr(
2766 self,
2767 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2768 None)
2769 if not handler:
2770 raise TracingFailure(
2771 'Unexpected event %s_%s' % (
2772 line[self.EVENT_NAME], line[self.TYPE]),
2773 None, None, None)
2774 handler(line)
2775 except TracingFailure, e:
2776 # Hack in the values since the handler could be a static function.
2777 e.pid = line[self.PID]
2778 e.line = line
2779 e.line_number = self._line_number
2780 # Re-raise the modified exception.
2781 raise
2782 except (KeyError, NotImplementedError, ValueError), e:
2783 raise TracingFailure(
2784 'Trace generated a %s exception: %s' % (
2785 e.__class__.__name__, str(e)),
2786 line[self.PID],
2787 self._line_number,
2788 line,
2789 e)
2790
2791 def to_results(self):
2792 if not self.root_process:
2793 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002794 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002795 None, None, None)
2796 process = self.root_process.to_results_process()
2797 return Results(process)
2798
2799 def _thread_to_process(self, tid):
2800 """Finds the process from the thread id."""
2801 tid = int(tid, 16)
2802 pid = self._threads_active.get(tid)
2803 if not pid or not self._process_lookup.get(pid):
2804 return
2805 return self._process_lookup[pid]
2806
2807 @classmethod
2808 def handle_EventTrace_Header(cls, line):
2809 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2810 BUFFER_SIZE = cls.USER_DATA
2811 #VERSION = cls.USER_DATA + 1
2812 #PROVIDER_VERSION = cls.USER_DATA + 2
2813 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2814 #END_TIME = cls.USER_DATA + 4
2815 #TIMER_RESOLUTION = cls.USER_DATA + 5
2816 #MAX_FILE_SIZE = cls.USER_DATA + 6
2817 #LOG_FILE_MODE = cls.USER_DATA + 7
2818 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2819 #START_BUFFERS = cls.USER_DATA + 9
2820 #POINTER_SIZE = cls.USER_DATA + 10
2821 EVENTS_LOST = cls.USER_DATA + 11
2822 #CPU_SPEED = cls.USER_DATA + 12
2823 #LOGGER_NAME = cls.USER_DATA + 13
2824 #LOG_FILE_NAME = cls.USER_DATA + 14
2825 #BOOT_TIME = cls.USER_DATA + 15
2826 #PERF_FREQ = cls.USER_DATA + 16
2827 #START_TIME = cls.USER_DATA + 17
2828 #RESERVED_FLAGS = cls.USER_DATA + 18
2829 #BUFFERS_LOST = cls.USER_DATA + 19
2830 #SESSION_NAME_STRING = cls.USER_DATA + 20
2831 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2832 if line[EVENTS_LOST] != '0':
2833 raise TracingFailure(
2834 ( '%s events were lost during trace, please increase the buffer '
2835 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2836 None, None, None)
2837
2838 def handle_FileIo_Cleanup(self, line):
2839 """General wisdom: if a file is closed, it's because it was opened.
2840
2841 Note that FileIo_Close is not used since if a file was opened properly but
2842 not closed before the process exits, only Cleanup will be logged.
2843 """
2844 #IRP = self.USER_DATA
2845 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2846 FILE_OBJECT = self.USER_DATA + 2
2847 #FILE_KEY = self.USER_DATA + 3
2848 proc = self._thread_to_process(line[TTID])
2849 if not proc:
2850 # Not a process we care about.
2851 return
2852 file_object = line[FILE_OBJECT]
2853 if file_object in proc.file_objects:
maruel@chromium.org538141b2013-06-03 20:57:17 +00002854 filepath, access_type = proc.file_objects.pop(file_object)
2855 proc.add_file(filepath, access_type)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002856
2857 def handle_FileIo_Create(self, line):
2858 """Handles a file open.
2859
2860 All FileIo events are described at
2861 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2862 for some value of 'description'.
2863
2864 " (..) process and thread id values of the IO events (..) are not valid "
2865 http://msdn.microsoft.com/magazine/ee358703.aspx
2866
2867 The FileIo.Create event doesn't return if the CreateFile() call
2868 succeeded, so keep track of the file_object and check that it is
2869 eventually closed with FileIo_Cleanup.
2870 """
2871 #IRP = self.USER_DATA
2872 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2873 FILE_OBJECT = self.USER_DATA + 2
2874 #CREATE_OPTIONS = self.USER_DATA + 3
2875 #FILE_ATTRIBUTES = self.USER_DATA + 4
maruel@chromium.org538141b2013-06-03 20:57:17 +00002876 #SHARE_ACCESS = self.USER_DATA + 5
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002877 OPEN_PATH = self.USER_DATA + 6
2878
2879 proc = self._thread_to_process(line[TTID])
2880 if not proc:
2881 # Not a process we care about.
2882 return
2883
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002884 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002885 # Ignore directories and bare drive right away.
2886 if raw_path.endswith(os.path.sep):
2887 return
2888 filepath = self._drive_map.to_win32(raw_path)
2889 # Ignore bare drive right away. Some may still fall through with format
2890 # like '\\?\X:'
2891 if len(filepath) == 2:
2892 return
2893 file_object = line[FILE_OBJECT]
2894 if os.path.isdir(filepath):
2895 # There is no O_DIRECTORY equivalent on Windows. The closed is
2896 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2897 # simply discard directories are they are found.
2898 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002899 # Override any stale file object.
2900 # TODO(maruel): Figure out a way to detect if the file was opened for
2901 # reading or writting. Sadly CREATE_OPTIONS doesn't seem to be of any help
2902 # here. For now mark as None to make it clear we have no idea what it is
2903 # about.
2904 proc.file_objects[file_object] = (filepath, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002905
2906 def handle_FileIo_Rename(self, line):
2907 # TODO(maruel): Handle?
2908 pass
2909
2910 def handle_Process_End(self, line):
2911 pid = line[self.PID]
2912 if self._process_lookup.get(pid):
2913 logging.info('Terminated: %d' % pid)
2914 self._process_lookup[pid] = None
2915 else:
2916 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002917 if self._thunk_process and self._thunk_process.pid == pid:
2918 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002919
2920 def handle_Process_Start(self, line):
2921 """Handles a new child process started by PID."""
2922 #UNIQUE_PROCESS_KEY = self.USER_DATA
2923 PROCESS_ID = self.USER_DATA + 1
2924 #PARENT_PID = self.USER_DATA + 2
2925 #SESSION_ID = self.USER_DATA + 3
2926 #EXIT_STATUS = self.USER_DATA + 4
2927 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2928 #USER_SID = self.USER_DATA + 6
2929 IMAGE_FILE_NAME = self.USER_DATA + 7
2930 COMMAND_LINE = self.USER_DATA + 8
2931
2932 ppid = line[self.PID]
2933 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002934 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002935 logging.debug(
2936 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002937 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002938
2939 if pid == self._thunk_pid:
2940 # Need to ignore processes we don't know about because the log is
2941 # system-wide. self._thunk_pid shall start only one process.
2942 # This is tricky though because Windows *loves* to reuse process id and
2943 # it happens often that the process ID of the thunk script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002944 # create_subprocess_thunk() is reused. So just detecting the pid here is
2945 # not sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002946 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2947 logging.info(
2948 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2949 pid, self._trace_name, command_line, self._thunk_cmd)
2950 return
2951
2952 # TODO(maruel): The check is quite weak. Add the thunk path.
2953 if self._thunk_process:
2954 raise TracingFailure(
2955 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2956 'already set') % (self._thunk_pid, self._thunk_process.pid),
2957 None, None, None)
2958 proc = self.Process(self.blacklist, pid, None)
2959 self._thunk_process = proc
2960 return
2961 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002962 proc = self.Process(self.blacklist, pid, None)
2963 self.root_process = proc
2964 ppid = None
2965 elif self._process_lookup.get(ppid):
2966 proc = self.Process(self.blacklist, pid, None)
2967 self._process_lookup[ppid].children.append(proc)
2968 else:
2969 # Ignore
2970 return
2971 self._process_lookup[pid] = proc
2972
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002973 proc.command = command_line
2974 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002975 # proc.command[0] may be the absolute path of 'executable' but it may be
2976 # anything else too. If it happens that command[0] ends with executable,
2977 # use it, otherwise defaults to the base name.
2978 cmd0 = proc.command[0].lower()
2979 if not cmd0.endswith('.exe'):
2980 # TODO(maruel): That's not strictly true either.
2981 cmd0 += '.exe'
2982 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
2983 # Fix the path.
2984 cmd0 = cmd0.replace('/', os.path.sep)
2985 cmd0 = os.path.normpath(cmd0)
2986 proc.executable = get_native_path_case(cmd0)
2987 logging.info(
2988 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2989
2990 def handle_Thread_End(self, line):
2991 """Has the same parameters as Thread_Start."""
2992 tid = int(line[self.TID], 16)
2993 self._threads_active.pop(tid, None)
2994
2995 def handle_Thread_Start(self, line):
2996 """Handles a new thread created.
2997
2998 Do not use self.PID here since a process' initial thread is created by
2999 the parent process.
3000 """
3001 PROCESS_ID = self.USER_DATA
3002 TTHREAD_ID = self.USER_DATA + 1
3003 #STACK_BASE = self.USER_DATA + 2
3004 #STACK_LIMIT = self.USER_DATA + 3
3005 #USER_STACK_BASE = self.USER_DATA + 4
3006 #USER_STACK_LIMIT = self.USER_DATA + 5
3007 #AFFINITY = self.USER_DATA + 6
3008 #WIN32_START_ADDR = self.USER_DATA + 7
3009 #TEB_BASE = self.USER_DATA + 8
3010 #SUB_PROCESS_TAG = self.USER_DATA + 9
3011 #BASE_PRIORITY = self.USER_DATA + 10
3012 #PAGE_PRIORITY = self.USER_DATA + 11
3013 #IO_PRIORITY = self.USER_DATA + 12
3014 #THREAD_FLAGS = self.USER_DATA + 13
3015 # Do not use self.PID here since a process' initial thread is created by
3016 # the parent process.
3017 pid = int(line[PROCESS_ID], 16)
3018 tid = int(line[TTHREAD_ID], 16)
3019 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
3020 self._threads_active[tid] = pid
3021
3022 @classmethod
3023 def supported_events(cls):
3024 """Returns all the procesed events."""
3025 out = []
3026 for member in dir(cls):
3027 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
3028 if match:
3029 out.append(match.groups())
3030 return out
3031
3032 class Tracer(ApiBase.Tracer):
3033 # The basic headers.
3034 EXPECTED_HEADER = [
3035 u'Event Name',
3036 u'Type',
3037 u'Event ID',
3038 u'Version',
3039 u'Channel',
3040 u'Level', # 5
3041 u'Opcode',
3042 u'Task',
3043 u'Keyword',
3044 u'PID',
3045 u'TID', # 10
3046 u'Processor Number',
3047 u'Instance ID',
3048 u'Parent Instance ID',
3049 u'Activity ID',
3050 u'Related Activity ID', # 15
3051 u'Clock-Time',
3052 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
3053 u'User(ms)', # pretty much useless.
3054 u'User Data', # Extra arguments that are event-specific.
3055 ]
3056 # Only the useful headers common to all entries are listed there. Any column
3057 # at 19 or higher is dependent on the specific event.
3058 EVENT_NAME = 0
3059 TYPE = 1
3060 PID = 9
3061 TID = 10
3062 PROCESSOR_ID = 11
3063 TIMESTAMP = 16
3064 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
3065 USER_DATA = 19
3066
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003067 class CsvReader(object):
3068 """CSV reader that reads files generated by tracerpt.exe.
3069
3070 csv.reader() fails to read them properly, it mangles file names quoted
3071 with "" with a comma in it.
3072 """
3073 # 0. Had a ',' or one of the following ' ' after a comma, next should
3074 # be ' ', '"' or string or ',' for an empty field.
3075 ( HAD_DELIMITER,
3076 # 1. Processing an unquoted field up to ','.
3077 IN_STR,
3078 # 2. Processing a new field starting with '"'.
3079 STARTING_STR_QUOTED,
3080 # 3. Second quote in a row at the start of a field. It could be either
3081 # '""foo""' or '""'. Who the hell thought it was a great idea to use
3082 # the same character for delimiting and escaping?
3083 STARTING_SECOND_QUOTE,
3084 # 4. A quote inside a quoted string where the previous character was
3085 # not a quote, so the string is not empty. Can be either: end of a
3086 # quoted string (a delimiter) or a quote escape. The next char must be
3087 # either '"' or ','.
3088 HAD_QUOTE_IN_QUOTED,
3089 # 5. Second quote inside a quoted string.
3090 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
3091 # 6. Processing a field that started with '"'.
3092 IN_STR_QUOTED) = range(7)
3093
3094 def __init__(self, f):
3095 self.f = f
3096
3097 def __iter__(self):
3098 return self
3099
3100 def next(self):
3101 """Splits the line in fields."""
3102 line = self.f.readline()
3103 if not line:
3104 raise StopIteration()
3105 line = line.strip()
3106 fields = []
3107 state = self.HAD_DELIMITER
3108 for i, c in enumerate(line):
3109 if state == self.HAD_DELIMITER:
3110 if c == ',':
3111 # Empty field.
3112 fields.append('')
3113 elif c == ' ':
3114 # Ignore initial whitespaces
3115 pass
3116 elif c == '"':
3117 state = self.STARTING_STR_QUOTED
3118 fields.append('')
3119 else:
3120 # Start of a new field.
3121 state = self.IN_STR
3122 fields.append(c)
3123
3124 elif state == self.IN_STR:
3125 # Do not accept quote inside unquoted field.
3126 assert c != '"', (i, c, line, fields)
3127 if c == ',':
3128 fields[-1] = fields[-1].strip()
3129 state = self.HAD_DELIMITER
3130 else:
3131 fields[-1] = fields[-1] + c
3132
3133 elif state == self.STARTING_STR_QUOTED:
3134 if c == '"':
3135 # Do not store the character yet.
3136 state = self.STARTING_SECOND_QUOTE
3137 else:
3138 state = self.IN_STR_QUOTED
3139 fields[-1] = fields[-1] + c
3140
3141 elif state == self.STARTING_SECOND_QUOTE:
3142 if c == ',':
3143 # It was an empty field. '""' == ''.
3144 state = self.HAD_DELIMITER
3145 else:
3146 fields[-1] = fields[-1] + '"' + c
3147 state = self.IN_STR_QUOTED
3148
3149 elif state == self.HAD_QUOTE_IN_QUOTED:
3150 if c == ',':
3151 # End of the string.
3152 state = self.HAD_DELIMITER
3153 elif c == '"':
3154 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
3155 else:
3156 # The previous double-quote was just an unescaped quote.
3157 fields[-1] = fields[-1] + '"' + c
3158 state = self.IN_STR_QUOTED
3159
3160 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
3161 if c == ',':
3162 # End of the string.
3163 state = self.HAD_DELIMITER
3164 fields[-1] = fields[-1] + '"'
3165 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00003166 # That's just how the logger rolls. Revert back to appending the
3167 # char and "guess" it was a quote in a double-quoted string.
3168 state = self.IN_STR_QUOTED
3169 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003170
3171 elif state == self.IN_STR_QUOTED:
3172 if c == '"':
3173 # Could be a delimiter or an escape.
3174 state = self.HAD_QUOTE_IN_QUOTED
3175 else:
3176 fields[-1] = fields[-1] + c
3177
3178 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
3179 fields[-1] = fields[-1] + '"'
3180 else:
3181 assert state in (
3182 # Terminated with a normal field.
3183 self.IN_STR,
3184 # Terminated with an empty field.
3185 self.STARTING_SECOND_QUOTE,
3186 # Terminated with a normal quoted field.
3187 self.HAD_QUOTE_IN_QUOTED), (
3188 line, state, fields)
3189 return fields
3190
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003191 def __init__(self, logname):
3192 """Starts the log collection.
3193
3194 Requires administrative access. logman.exe is synchronous so no need for a
3195 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
3196 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
3197
3198 One can get the list of potentially interesting providers with:
3199 "logman query providers | findstr /i file"
3200 """
3201 super(LogmanTrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00003202 self._signal_script = create_subprocess_thunk()
3203 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003204 cmd_start = [
3205 'logman.exe',
3206 'start',
3207 'NT Kernel Logger',
3208 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
3209 # splitio,fileiocompletion,syscall,file,cswitch,img
3210 '(process,fileio,thread)',
3211 '-o', self._logname + '.etl',
3212 '-ets', # Send directly to kernel
3213 # Values extracted out of thin air.
3214 # Event Trace Session buffer size in kb.
3215 '-bs', '10240',
3216 # Number of Event Trace Session buffers.
3217 '-nb', '16', '256',
3218 ]
3219 logging.debug('Running: %s' % cmd_start)
3220 try:
3221 subprocess.check_call(
3222 cmd_start,
3223 stdin=subprocess.PIPE,
3224 stdout=subprocess.PIPE,
3225 stderr=subprocess.STDOUT)
3226 except subprocess.CalledProcessError, e:
3227 if e.returncode == -2147024891:
3228 print >> sys.stderr, 'Please restart with an elevated admin prompt'
3229 elif e.returncode == -2144337737:
3230 print >> sys.stderr, (
3231 'A kernel trace was already running, stop it and try again')
3232 raise
3233
3234 def trace(self, cmd, cwd, tracename, output):
3235 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
3236 assert os.path.isabs(cmd[0]), cmd[0]
3237 assert os.path.isabs(cwd), cwd
3238 assert os.path.normpath(cwd) == cwd, cwd
3239 with self._lock:
3240 if not self._initialized:
3241 raise TracingFailure(
3242 'Called Tracer.trace() on an unitialized object',
3243 None, None, None, tracename)
3244 assert tracename not in (i['trace'] for i in self._traces)
3245
3246 # Use "logman -?" for help.
3247
3248 stdout = stderr = None
3249 if output:
3250 stdout = subprocess.PIPE
3251 stderr = subprocess.STDOUT
3252
3253 # Run the child process.
3254 logging.debug('Running: %s' % cmd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00003255 # Use the temporary script generated with create_subprocess_thunk() so we
3256 # have a clear pid owner. Since trace_inputs.py can be used as a library
3257 # and could trace multiple processes simultaneously, it makes it more
3258 # complex if the executable to be traced is executed directly here. It
3259 # also solves issues related to logman.exe that needs to be executed to
3260 # control the kernel trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003261 child_cmd = [
3262 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00003263 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003264 tracename,
3265 ]
3266 child = subprocess.Popen(
3267 child_cmd + fix_python_path(cmd),
3268 cwd=cwd,
3269 stdin=subprocess.PIPE,
3270 stdout=stdout,
3271 stderr=stderr)
3272 logging.debug('Started child pid: %d' % child.pid)
3273 out = child.communicate()[0]
3274 # This doesn't mean all the grand-children are done. Sadly, we don't have
3275 # a good way to determine that.
3276
3277 with self._lock:
3278 assert tracename not in (i['trace'] for i in self._traces)
3279 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00003280 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003281 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00003282 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003283 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00003284 # Used to figure out the real process when process ids are reused.
3285 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003286 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003287 })
3288
3289 return child.returncode, out
3290
3291 def close(self, _timeout=None):
3292 """Stops the kernel log collection and converts the traces to text
3293 representation.
3294 """
3295 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00003296 try:
3297 super(LogmanTrace.Tracer, self).close()
3298 finally:
3299 cmd_stop = [
3300 'logman.exe',
3301 'stop',
3302 'NT Kernel Logger',
3303 '-ets', # Sends the command directly to the kernel.
3304 ]
3305 logging.debug('Running: %s' % cmd_stop)
3306 subprocess.check_call(
3307 cmd_stop,
3308 stdin=subprocess.PIPE,
3309 stdout=subprocess.PIPE,
3310 stderr=subprocess.STDOUT)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003311
3312 def post_process_log(self):
3313 """Converts the .etl file into .csv then into .json."""
3314 super(LogmanTrace.Tracer, self).post_process_log()
3315 logformat = 'csv'
3316 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003317 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003318
maruel@chromium.org808e3f22013-05-16 16:42:54 +00003319 def _gen_logdata(self):
3320 return {
3321 'format': 'csv',
3322 'traces': self._traces,
3323 }
3324
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003325 def _trim_log(self, logformat):
3326 """Reduces the amount of data in original log by generating a 'reduced'
3327 log.
3328 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003329 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003330 file_handle = codecs.open(
3331 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003332
3333 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003334 assert sys.getfilesystemencoding() == 'mbcs'
3335 file_handle = codecs.open(
3336 self._logname + '.' + logformat, 'r',
3337 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003338
3339 supported_events = LogmanTrace.Context.supported_events()
3340
3341 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003342 """Loads items from the generator and returns the interesting data.
3343
3344 It filters out any uninteresting line and reduce the amount of data in
3345 the trace.
3346 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003347 for index, line in enumerate(generator):
3348 if not index:
3349 if line != self.EXPECTED_HEADER:
3350 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003351 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003352 None, None, None)
3353 continue
3354 # As you can see, the CSV is full of useful non-redundant information:
3355 if (line[2] != '0' or # Event ID
3356 line[3] not in ('2', '3') or # Version
3357 line[4] != '0' or # Channel
3358 line[5] != '0' or # Level
3359 line[7] != '0' or # Task
3360 line[8] != '0x0000000000000000' or # Keyword
3361 line[12] != '' or # Instance ID
3362 line[13] != '' or # Parent Instance ID
3363 line[14] != self.NULL_GUID or # Activity ID
3364 line[15] != ''): # Related Activity ID
3365 raise TracingFailure(
3366 'Found unexpected values in line: %s' % ' '.join(line),
3367 None, None, None)
3368
3369 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3370 continue
3371
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003372 yield [
3373 line[self.EVENT_NAME],
3374 line[self.TYPE],
3375 line[self.PID],
3376 line[self.TID],
3377 line[self.PROCESSOR_ID],
3378 line[self.TIMESTAMP],
3379 ] + line[self.USER_DATA:]
3380
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003381 # must not convert the trim() call into a list, since it will use too much
3382 # memory for large trace. use a csv file as a workaround since the json
3383 # parser requires a complete in-memory file.
3384 with open('%s.preprocessed' % self._logname, 'wb') as f:
3385 # $ and * can't be used in file name on windows, reducing the likelihood
3386 # of having to escape a string.
3387 out = csv.writer(
3388 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003389 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003390 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003391
3392 def _convert_log(self, logformat):
3393 """Converts the ETL trace to text representation.
3394
3395 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3396 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3397
3398 Arguments:
3399 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3400
3401 Use "tracerpt -?" for help.
3402 """
3403 LOCALE_INVARIANT = 0x7F
3404 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3405 cmd_convert = [
3406 'tracerpt.exe',
3407 '-l', self._logname + '.etl',
3408 '-o', self._logname + '.' + logformat,
3409 '-gmt', # Use UTC
3410 '-y', # No prompt
3411 # Use -of XML to get the header of each items after column 19, e.g. all
3412 # the actual headers of 'User Data'.
3413 ]
3414
3415 if logformat == 'csv':
3416 # tracerpt localizes the 'Type' column, for major brainfuck
3417 # entertainment. I can't imagine any sane reason to do that.
3418 cmd_convert.extend(['-of', 'CSV'])
3419 elif logformat == 'csv_utf16':
3420 # This causes it to use UTF-16, which doubles the log size but ensures
3421 # the log is readable for non-ASCII characters.
3422 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3423 elif logformat == 'xml':
3424 cmd_convert.extend(['-of', 'XML'])
3425 else:
3426 raise ValueError('Unexpected log format \'%s\'' % logformat)
3427 logging.debug('Running: %s' % cmd_convert)
3428 # This can takes tens of minutes for large logs.
3429 # Redirects all output to stderr.
3430 subprocess.check_call(
3431 cmd_convert,
3432 stdin=subprocess.PIPE,
3433 stdout=sys.stderr,
3434 stderr=sys.stderr)
3435
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003436 def __init__(self, use_sudo=False): # pylint: disable=W0613
3437 super(LogmanTrace, self).__init__()
3438 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3439
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003440 @staticmethod
3441 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003442 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003443 if os.path.isfile(logname + ext):
3444 os.remove(logname + ext)
3445
3446 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003447 def parse_log(cls, logname, blacklist, trace_name):
3448 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003449 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003450
3451 def blacklist_more(filepath):
3452 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3453 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3454
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003455 # Create a list of (Context, result_dict) tuples. This is necessary because
3456 # the csv file may be larger than the amount of available memory.
3457 contexes = [
3458 (
3459 cls.Context(
3460 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3461 {
3462 'output': item['output'],
3463 'trace': item['trace'],
3464 },
3465 )
3466 for item in read_json(logname)['traces']
3467 if not trace_name or item['trace'] == trace_name
3468 ]
3469
3470 # The log may be too large to fit in memory and it is not efficient to read
3471 # it multiple times, so multiplex the contexes instead, which is slightly
3472 # more awkward.
3473 with open('%s.preprocessed' % logname, 'rb') as f:
3474 lines = csv.reader(
3475 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3476 for encoded in lines:
3477 line = [s.decode('utf-8') for s in encoded]
3478 # Convert the PID in-place from hex.
3479 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3480 for context in contexes:
3481 if 'exception' in context[1]:
3482 continue
3483 try:
3484 context[0].on_line(line)
3485 except TracingFailure:
3486 context[1]['exception'] = sys.exc_info()
3487
3488 for context in contexes:
3489 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003490 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003491 context[1]['results'] = context[0].to_results()
3492
3493 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003494
3495
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003496def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003497 """Returns the correct implementation for the current OS."""
3498 if sys.platform == 'cygwin':
3499 raise NotImplementedError(
3500 'Not implemented for cygwin, start the script from Win32 python')
3501 flavors = {
3502 'win32': LogmanTrace,
3503 'darwin': Dtrace,
3504 'sunos5': Dtrace,
3505 'freebsd7': Dtrace,
3506 'freebsd8': Dtrace,
3507 }
3508 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003509 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003510
3511
3512def extract_directories(root_dir, files, blacklist):
3513 """Detects if all the files in a directory are in |files| and if so, replace
3514 the individual files by a Results.Directory instance.
3515
3516 Takes a list of Results.File instances and returns a shorter list of
3517 Results.File and Results.Directory instances.
3518
3519 Arguments:
3520 - root_dir: Optional base directory that shouldn't be search further.
3521 - files: list of Results.File instances.
3522 - blacklist: regexp of files to ignore, for example r'.+\.pyc'.
3523 """
3524 logging.info(
3525 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3526 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003527 # It is important for root_dir to not be a symlinked path, make sure to call
3528 # os.path.realpath() as needed.
3529 assert not root_dir or (
3530 os.path.realpath(get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003531 assert not any(isinstance(f, Results.Directory) for f in files)
3532 # Remove non existent files.
3533 files = [f for f in files if f.existent]
3534 if not files:
3535 return files
3536 # All files must share the same root, which can be None.
3537 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3538
3539 # Creates a {directory: {filename: File}} mapping, up to root.
3540 buckets = {}
3541 if root_dir:
3542 buckets[root_dir] = {}
3543 for fileobj in files:
3544 path = fileobj.full_path
3545 directory = os.path.dirname(path)
3546 assert directory
3547 # Do not use os.path.basename() so trailing os.path.sep is kept.
3548 basename = path[len(directory)+1:]
3549 files_in_directory = buckets.setdefault(directory, {})
3550 files_in_directory[basename] = fileobj
3551 # Add all the directories recursively up to root.
3552 while True:
3553 old_d = directory
3554 directory = os.path.dirname(directory)
3555 if directory + os.path.sep == root_dir or directory == old_d:
3556 break
3557 buckets.setdefault(directory, {})
3558
3559 root_prefix = len(root_dir) + 1 if root_dir else 0
3560 for directory in sorted(buckets, reverse=True):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003561 if not os.path.isdir(directory):
3562 logging.debug(
3563 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3564 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003565 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3566 expected = set(buckets[directory])
3567 if not (actual - expected):
3568 parent = os.path.dirname(directory)
3569 buckets[parent][os.path.basename(directory)] = Results.Directory(
3570 root_dir,
3571 directory[root_prefix:],
3572 False,
3573 sum(f.size for f in buckets[directory].itervalues()),
3574 sum(f.nb_files for f in buckets[directory].itervalues()))
3575 # Remove the whole bucket.
3576 del buckets[directory]
3577
3578 # Reverse the mapping with what remains. The original instances are returned,
3579 # so the cached meta data is kept.
3580 files = sum((x.values() for x in buckets.itervalues()), [])
3581 return sorted(files, key=lambda x: x.path)
3582
3583
3584def trace(logfile, cmd, cwd, api, output):
3585 """Traces an executable. Returns (returncode, output) from api.
3586
3587 Arguments:
3588 - logfile: file to write to.
3589 - cmd: command to run.
3590 - cwd: current directory to start the process in.
3591 - api: a tracing api instance.
3592 - output: if True, returns output, otherwise prints it at the console.
3593 """
3594 cmd = fix_python_path(cmd)
3595 api.clean_trace(logfile)
3596 with api.get_tracer(logfile) as tracer:
3597 return tracer.trace(cmd, cwd, 'default', output)
3598
3599
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003600def CMDclean(args):
3601 """Cleans up traces."""
3602 parser = OptionParserTraceInputs(command='clean')
3603 options, args = parser.parse_args(args)
3604 api = get_api()
3605 api.clean_trace(options.log)
3606 return 0
3607
3608
3609def CMDtrace(args):
3610 """Traces an executable."""
3611 parser = OptionParserTraceInputs(command='trace')
3612 parser.allow_interspersed_args = False
3613 parser.add_option(
3614 '-q', '--quiet', action='store_true',
3615 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003616 parser.add_option(
3617 '-s', '--sudo', action='store_true',
3618 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3619 parser.add_option(
3620 '-n', '--no-sudo', action='store_false',
3621 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003622 options, args = parser.parse_args(args)
3623
3624 if not args:
3625 parser.error('Please provide a command to run')
3626
3627 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3628 args[0] = os.path.abspath(args[0])
3629
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003630 # options.sudo default value is None, which is to do whatever tracer defaults
3631 # do.
3632 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003633 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3634
3635
3636def CMDread(args):
3637 """Reads the logs and prints the result."""
3638 parser = OptionParserTraceInputs(command='read')
3639 parser.add_option(
3640 '-V', '--variable',
3641 nargs=2,
3642 action='append',
3643 dest='variables',
3644 metavar='VAR_NAME directory',
3645 default=[],
3646 help=('Variables to replace relative directories against. Example: '
3647 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3648 'home dir with $HOME') % getpass.getuser())
3649 parser.add_option(
3650 '--root-dir',
3651 help='Root directory to base everything off it. Anything outside of this '
3652 'this directory will not be reported')
3653 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003654 '--trace-name',
3655 help='Only reads one of the trace. Defaults to reading all traces')
3656 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003657 '-j', '--json', action='store_true',
3658 help='Outputs raw result data as json')
3659 parser.add_option(
3660 '-b', '--blacklist', action='append', default=[],
3661 help='List of regexp to use as blacklist filter')
3662 options, args = parser.parse_args(args)
3663
3664 if options.root_dir:
maruel@chromium.org7dc17b32013-04-02 22:27:05 +00003665 options.root_dir = get_native_path_case(
3666 unicode(os.path.abspath(options.root_dir)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003667
3668 variables = dict(options.variables)
3669 api = get_api()
3670 def blacklist(f):
3671 return any(re.match(b, f) for b in options.blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003672 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003673 # Process each trace.
3674 output_as_json = []
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003675 try:
3676 for item in data:
3677 if 'exception' in item:
3678 # Do not abort the other traces.
3679 print >> sys.stderr, (
3680 'Trace %s: Got an exception: %s' % (
3681 item['trace'], item['exception'][1]))
3682 continue
3683 results = item['results']
3684 if options.root_dir:
3685 results = results.strip_root(options.root_dir)
3686
3687 if options.json:
3688 output_as_json.append(results.flatten())
3689 else:
3690 simplified = extract_directories(
3691 options.root_dir, results.files, blacklist)
3692 simplified = [f.replace_variables(variables) for f in simplified]
3693 if len(data) > 1:
3694 print('Trace: %s' % item['trace'])
3695 print('Total: %d' % len(results.files))
3696 print('Non existent: %d' % len(results.non_existent))
3697 for f in results.non_existent:
3698 print(' %s' % f.path)
3699 print(
3700 'Interesting: %d reduced to %d' % (
3701 len(results.existent), len(simplified)))
3702 for f in simplified:
3703 print(' %s' % f.path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003704
3705 if options.json:
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003706 write_json(sys.stdout, output_as_json, False)
3707 except KeyboardInterrupt:
3708 return 1
3709 except IOError as e:
3710 if e.errno == errno.EPIPE:
3711 # Do not print a stack trace when the output is piped to less and the user
3712 # quits before the whole output was written.
3713 return 1
3714 raise
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003715 return 0
3716
3717
3718class OptionParserWithLogging(optparse.OptionParser):
3719 """Adds --verbose option."""
3720 def __init__(self, verbose=0, **kwargs):
3721 optparse.OptionParser.__init__(self, **kwargs)
3722 self.add_option(
3723 '-v', '--verbose',
3724 action='count',
3725 default=verbose,
3726 help='Use multiple times to increase verbosity')
3727
3728 def parse_args(self, *args, **kwargs):
3729 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
3730 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
3731 logging.basicConfig(
3732 level=levels[min(len(levels)-1, options.verbose)],
3733 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
3734 return options, args
3735
3736
3737class OptionParserWithNiceDescription(OptionParserWithLogging):
3738 """Generates the description with the command's docstring."""
3739 def __init__(self, **kwargs):
3740 """Sets 'description' and 'usage' if not already specified."""
3741 command = kwargs.pop('command', 'help')
3742 kwargs.setdefault(
3743 'description',
3744 re.sub('[\r\n ]{2,}', ' ', get_command_handler(command).__doc__))
3745 kwargs.setdefault('usage', '%%prog %s [options]' % command)
3746 OptionParserWithLogging.__init__(self, **kwargs)
3747
3748
3749class OptionParserTraceInputs(OptionParserWithNiceDescription):
3750 """Adds automatic --log handling."""
3751 def __init__(self, **kwargs):
3752 OptionParserWithNiceDescription.__init__(self, **kwargs)
3753 self.add_option(
3754 '-l', '--log', help='Log file to generate or read, required')
3755
3756 def parse_args(self, *args, **kwargs):
3757 """Makes sure the paths make sense.
3758
3759 On Windows, / and \ are often mixed together in a path.
3760 """
3761 options, args = OptionParserWithNiceDescription.parse_args(
3762 self, *args, **kwargs)
3763 if not options.log:
3764 self.error('Must supply a log file with -l')
3765 options.log = os.path.abspath(options.log)
3766 return options, args
3767
3768
3769def extract_documentation():
3770 """Returns a dict {command: description} for each of documented command."""
3771 commands = (
3772 fn[3:]
3773 for fn in dir(sys.modules['__main__'])
3774 if fn.startswith('CMD') and get_command_handler(fn[3:]).__doc__)
3775 return dict((fn, get_command_handler(fn).__doc__) for fn in commands)
3776
3777
3778def CMDhelp(args):
3779 """Prints list of commands or help for a specific command."""
3780 doc = extract_documentation()
3781 # Calculates the optimal offset.
3782 offset = max(len(cmd) for cmd in doc)
3783 format_str = ' %-' + str(offset + 2) + 's %s'
3784 # Generate a one-liner documentation of each commands.
3785 commands_description = '\n'.join(
3786 format_str % (cmd, doc[cmd].split('\n')[0]) for cmd in sorted(doc))
3787
3788 parser = OptionParserWithNiceDescription(
3789 usage='%prog <command> [options]',
3790 description='Commands are:\n%s\n' % commands_description)
3791 parser.format_description = lambda _: parser.description
3792
3793 # Strip out any -h or --help argument.
3794 _, args = parser.parse_args([i for i in args if not i in ('-h', '--help')])
3795 if len(args) == 1:
3796 if not get_command_handler(args[0]):
3797 parser.error('Unknown command %s' % args[0])
3798 # The command was "%prog help command", replaces ourself with
3799 # "%prog command --help" so help is correctly printed out.
3800 return main(args + ['--help'])
3801 elif args:
3802 parser.error('Unknown argument "%s"' % ' '.join(args))
3803 parser.print_help()
3804 return 0
3805
3806
3807def get_command_handler(name):
3808 """Returns the command handler or CMDhelp if it doesn't exist."""
3809 return getattr(sys.modules['__main__'], 'CMD%s' % name, None)
3810
3811
3812def main_impl(argv):
3813 command = get_command_handler(argv[0] if argv else 'help')
3814 if not command:
3815 return CMDhelp(argv)
3816 return command(argv[1:])
3817
3818def main(argv):
maruel@chromium.orgac36fb72013-05-21 14:50:53 +00003819 disable_buffering()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003820 try:
3821 main_impl(argv)
3822 except TracingFailure, e:
3823 sys.stderr.write('\nError: ')
3824 sys.stderr.write(str(e))
3825 sys.stderr.write('\n')
3826 return 1
3827
3828
3829if __name__ == '__main__':
3830 sys.exit(main(sys.argv[1:]))