blob: 6976025f7aa8ddcbcc8efd6294585ffa3c04a3b3 [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
3# Copyright (c) 2012 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
maruel@chromium.org9e56b592013-05-21 16:56:05 +000023import errno
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000024import getpass
25import glob
26import json
27import logging
28import optparse
29import os
30import re
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000031import stat
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000032import subprocess
33import sys
34import tempfile
35import threading
36import time
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000037import unicodedata
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000038import weakref
39
40## OS-specific imports
41
42if sys.platform == 'win32':
43 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p
44 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611
45 from ctypes.wintypes import GetLastError # pylint: disable=E0611
46elif sys.platform == 'darwin':
47 import Carbon.File # pylint: disable=F0401
48 import MacOS # pylint: disable=F0401
49
50
51BASE_DIR = os.path.dirname(os.path.abspath(__file__))
52ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
53
54
55class TracingFailure(Exception):
56 """An exception occured during tracing."""
57 def __init__(self, description, pid, line_number, line, *args):
58 super(TracingFailure, self).__init__(
59 description, pid, line_number, line, *args)
60 self.description = description
61 self.pid = pid
62 self.line_number = line_number
63 self.line = line
64 self.extra = args
65
66 def __str__(self):
67 out = self.description
68 if self.pid:
69 out += '\npid: %d' % self.pid
70 if self.line_number:
71 out += '\nline: %d' % self.line_number
72 if self.line:
73 out += '\n%s' % self.line
74 if self.extra:
75 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
76 return out
77
78
79## OS-specific functions
80
81if sys.platform == 'win32':
82 def QueryDosDevice(drive_letter):
83 """Returns the Windows 'native' path for a DOS drive letter."""
84 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
maruel@chromium.org306e0e72012-11-02 18:22:03 +000085 assert isinstance(drive_letter, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000086 # Guesswork. QueryDosDeviceW never returns the required number of bytes.
87 chars = 1024
maruel@chromium.org306e0e72012-11-02 18:22:03 +000088 drive_letter = drive_letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000089 p = create_unicode_buffer(chars)
90 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):
91 err = GetLastError()
92 if err:
93 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +000094 msg = u'QueryDosDevice(%s): %s (%d)' % (
95 drive_letter, FormatError(err), err)
96 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000097 return p.value
98
99
100 def GetShortPathName(long_path):
101 """Returns the Windows short path equivalent for a 'long' path."""
maruel@chromium.org68bb1482013-02-15 22:36:24 +0000102 assert isinstance(long_path, unicode), repr(long_path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000103 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
104 # not enforced.
105 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):
106 long_path = '\\\\?\\' + long_path
107 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)
108 if chars:
109 p = create_unicode_buffer(chars)
110 if windll.kernel32.GetShortPathNameW(long_path, p, chars):
111 return p.value
112
113 err = GetLastError()
114 if err:
115 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000116 msg = u'GetShortPathName(%s): %s (%d)' % (
117 long_path, FormatError(err), err)
118 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000119
120
121 def GetLongPathName(short_path):
122 """Returns the Windows long path equivalent for a 'short' path."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000123 assert isinstance(short_path, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000124 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
125 # not enforced.
126 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):
127 short_path = '\\\\?\\' + short_path
128 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)
129 if chars:
130 p = create_unicode_buffer(chars)
131 if windll.kernel32.GetLongPathNameW(short_path, p, chars):
132 return p.value
133
134 err = GetLastError()
135 if err:
136 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000137 msg = u'GetLongPathName(%s): %s (%d)' % (
138 short_path, FormatError(err), err)
139 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000140
141
142 def get_current_encoding():
143 """Returns the 'ANSI' code page associated to the process."""
144 return 'cp%d' % int(windll.kernel32.GetACP())
145
146
147 class DosDriveMap(object):
148 """Maps \Device\HarddiskVolumeN to N: on Windows."""
149 # Keep one global cache.
150 _MAPPING = {}
151
152 def __init__(self):
153 """Lazy loads the cache."""
154 if not self._MAPPING:
155 # This is related to UNC resolver on windows. Ignore that.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000156 self._MAPPING[u'\\Device\\Mup'] = None
157 self._MAPPING[u'\\SystemRoot'] = os.environ[u'SystemRoot']
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000158
159 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
160 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000161 letter = u'%s:' % letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000162 mapped = QueryDosDevice(letter)
163 if mapped in self._MAPPING:
164 logging.warn(
165 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
166 '. Drive letters are a user-mode concept and the kernel '
167 'traces only have NT path, so all accesses will be '
168 'associated with the first drive letter, independent of the '
169 'actual letter used by the code') % (
170 self._MAPPING[mapped], letter))
171 else:
172 self._MAPPING[mapped] = letter
173 except WindowsError: # pylint: disable=E0602
174 pass
175
176 def to_win32(self, path):
177 """Converts a native NT path to Win32/DOS compatible path."""
178 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
179 if not match:
180 raise ValueError(
181 'Can\'t convert %s into a Win32 compatible path' % path,
182 path)
183 if not match.group(1) in self._MAPPING:
184 # Unmapped partitions may be accessed by windows for the
185 # fun of it while the test is running. Discard these.
186 return None
187 drive = self._MAPPING[match.group(1)]
188 if not drive or not match.group(2):
189 return drive
190 return drive + match.group(2)
191
192
193 def isabs(path):
194 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""
195 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
196
197
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000198 def find_item_native_case(root, item):
199 """Gets the native path case of a single item based at root_path."""
200 if item == '..':
201 return item
202
203 root = get_native_path_case(root)
204 return os.path.basename(get_native_path_case(os.path.join(root, item)))
205
206
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000207 def get_native_path_case(p):
208 """Returns the native path case for an existing file.
209
210 On Windows, removes any leading '\\?\'.
211 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000212 assert isinstance(p, unicode), repr(p)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000213 if not isabs(p):
214 raise ValueError(
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000215 'get_native_path_case(%r): Require an absolute path' % p, p)
216
maruel@chromium.org037758d2012-12-10 17:59:46 +0000217 # Make sure it is normalized to os.path.sep. Do not do it here to keep the
218 # function fast
219 assert '/' not in p, p
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000220 suffix = ''
221 count = p.count(':')
222 if count > 1:
223 # This means it has an alternate-data stream. There could be 3 ':', since
224 # it could be the $DATA datastream of an ADS. Split the whole ADS suffix
225 # off and add it back afterward. There is no way to know the native path
226 # case of an alternate data stream.
227 items = p.split(':')
228 p = ':'.join(items[0:2])
229 suffix = ''.join(':' + i for i in items[2:])
230
maruel@chromium.org42b091d2013-04-02 22:27:02 +0000231 # TODO(maruel): Use os.path.normpath?
232 if p.endswith('.\\'):
233 p = p[:-2]
234
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000235 # Windows used to have an option to turn on case sensitivity on non Win32
236 # subsystem but that's out of scope here and isn't supported anymore.
237 # Go figure why GetShortPathName() is needed.
238 try:
239 out = GetLongPathName(GetShortPathName(p))
240 except OSError, e:
241 if e.args[0] in (2, 3, 5):
242 # The path does not exist. Try to recurse and reconstruct the path.
243 base = os.path.dirname(p)
244 rest = os.path.basename(p)
245 return os.path.join(get_native_path_case(base), rest)
246 raise
247 if out.startswith('\\\\?\\'):
248 out = out[4:]
249 # Always upper case the first letter since GetLongPathName() will return the
250 # drive letter in the case it was given.
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000251 return out[0].upper() + out[1:] + suffix
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000252
253
254 def CommandLineToArgvW(command_line):
255 """Splits a commandline into argv using CommandLineToArgvW()."""
256 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
257 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000258 assert isinstance(command_line, unicode)
259 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000260 try:
261 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
262 finally:
263 windll.kernel32.LocalFree(ptr)
264
265
266elif sys.platform == 'darwin':
267
268
269 # On non-windows, keep the stdlib behavior.
270 isabs = os.path.isabs
271
272
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000273 def _native_case(p):
274 """Gets the native path case. Warning: this function resolves symlinks."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000275 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000276 rel_ref, _ = Carbon.File.FSPathMakeRef(p.encode('utf-8'))
277 # The OSX underlying code uses NFD but python strings are in NFC. This
278 # will cause issues with os.listdir() for example. Since the dtrace log
279 # *is* in NFC, normalize it here.
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000280 out = unicodedata.normalize(
281 'NFC', rel_ref.FSRefMakePath().decode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000282 if p.endswith(os.path.sep) and not out.endswith(os.path.sep):
283 return out + os.path.sep
284 return out
285 except MacOS.Error, e:
286 if e.args[0] in (-43, -120):
287 # The path does not exist. Try to recurse and reconstruct the path.
288 # -43 means file not found.
289 # -120 means directory not found.
290 base = os.path.dirname(p)
291 rest = os.path.basename(p)
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000292 return os.path.join(_native_case(base), rest)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000293 raise OSError(
294 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])
295
296
297 def _split_at_symlink_native(base_path, rest):
298 """Returns the native path for a symlink."""
299 base, symlink, rest = split_at_symlink(base_path, rest)
300 if symlink:
301 if not base_path:
302 base_path = base
303 else:
304 base_path = safe_join(base_path, base)
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000305 symlink = find_item_native_case(base_path, symlink)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000306 return base, symlink, rest
307
308
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000309 def find_item_native_case(root_path, item):
310 """Gets the native path case of a single item based at root_path.
311
312 There is no API to get the native path case of symlinks on OSX. So it
313 needs to be done the slow way.
314 """
315 if item == '..':
316 return item
317
318 item = item.lower()
319 for element in os.listdir(root_path):
320 if element.lower() == item:
321 return element
322
323
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000324 def get_native_path_case(path):
325 """Returns the native path case for an existing file.
326
327 Technically, it's only HFS+ on OSX that is case preserving and
328 insensitive. It's the default setting on HFS+ but can be changed.
329 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000330 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000331 if not isabs(path):
332 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000333 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000334 if path.startswith('/dev'):
335 # /dev is not visible from Carbon, causing an exception.
336 return path
337
338 # Starts assuming there is no symlink along the path.
339 resolved = _native_case(path)
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000340 if path.lower() in (resolved.lower(), resolved.lower() + './'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000341 # This code path is incredibly faster.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000342 logging.debug('get_native_path_case(%s) = %s' % (path, resolved))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000343 return resolved
344
345 # There was a symlink, process it.
346 base, symlink, rest = _split_at_symlink_native(None, path)
347 assert symlink, (path, base, symlink, rest, resolved)
348 prev = base
349 base = safe_join(_native_case(base), symlink)
350 assert len(base) > len(prev)
351 while rest:
352 prev = base
353 relbase, symlink, rest = _split_at_symlink_native(base, rest)
354 base = safe_join(base, relbase)
355 assert len(base) > len(prev), (prev, base, symlink)
356 if symlink:
357 base = safe_join(base, symlink)
358 assert len(base) > len(prev), (prev, base, symlink)
359 # Make sure no symlink was resolved.
360 assert base.lower() == path.lower(), (base, path)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000361 logging.debug('get_native_path_case(%s) = %s' % (path, base))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000362 return base
363
364
365else: # OSes other than Windows and OSX.
366
367
368 # On non-windows, keep the stdlib behavior.
369 isabs = os.path.isabs
370
371
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000372 def find_item_native_case(root, item):
373 """Gets the native path case of a single item based at root_path."""
374 if item == '..':
375 return item
376
377 root = get_native_path_case(root)
378 return os.path.basename(get_native_path_case(os.path.join(root, item)))
379
380
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000381 def get_native_path_case(path):
382 """Returns the native path case for an existing file.
383
384 On OSes other than OSX and Windows, assume the file system is
385 case-sensitive.
386
387 TODO(maruel): This is not strictly true. Implement if necessary.
388 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000389 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000390 if not isabs(path):
391 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000392 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000393 # Give up on cygwin, as GetLongPathName() can't be called.
394 # Linux traces tends to not be normalized so use this occasion to normalize
395 # it. This function implementation already normalizes the path on the other
396 # OS so this needs to be done here to be coherent between OSes.
397 out = os.path.normpath(path)
398 if path.endswith(os.path.sep) and not out.endswith(os.path.sep):
399 return out + os.path.sep
400 return out
401
402
403if sys.platform != 'win32': # All non-Windows OSes.
404
405
406 def safe_join(*args):
407 """Joins path elements like os.path.join() but doesn't abort on absolute
408 path.
409
410 os.path.join('foo', '/bar') == '/bar'
411 but safe_join('foo', '/bar') == 'foo/bar'.
412 """
413 out = ''
414 for element in args:
415 if element.startswith(os.path.sep):
416 if out.endswith(os.path.sep):
417 out += element[1:]
418 else:
419 out += element
420 else:
421 if out.endswith(os.path.sep):
422 out += element
423 else:
424 out += os.path.sep + element
425 return out
426
427
428 def split_at_symlink(base_dir, relfile):
429 """Scans each component of relfile and cut the string at the symlink if
430 there is any.
431
432 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if
433 not symlink was found.
434 """
435 if base_dir:
436 assert relfile
437 assert os.path.isabs(base_dir)
438 index = 0
439 else:
440 assert os.path.isabs(relfile)
441 index = 1
442
443 def at_root(rest):
444 if base_dir:
445 return safe_join(base_dir, rest)
446 return rest
447
448 while True:
449 try:
450 index = relfile.index(os.path.sep, index)
451 except ValueError:
452 index = len(relfile)
453 full = at_root(relfile[:index])
454 if os.path.islink(full):
455 # A symlink!
456 base = os.path.dirname(relfile[:index])
457 symlink = os.path.basename(relfile[:index])
458 rest = relfile[index:]
459 logging.debug(
460 'split_at_symlink(%s, %s) -> (%s, %s, %s)' %
461 (base_dir, relfile, base, symlink, rest))
462 return base, symlink, rest
463 if index == len(relfile):
464 break
465 index += 1
466 return relfile, None, None
467
468
maruel@chromium.orgac36fb72013-05-21 14:50:53 +0000469class Unbuffered(object):
470 """Disable buffering on a file object."""
471 def __init__(self, stream):
472 self.stream = stream
473
474 def write(self, data):
475 self.stream.write(data)
476 if '\n' in data:
477 self.stream.flush()
478
479 def __getattr__(self, attr):
480 return getattr(self.stream, attr)
481
482
483def disable_buffering():
484 """Makes this process and child processes stdout unbuffered."""
485 if not os.environ.get('PYTHONUNBUFFERED'):
486 # Since sys.stdout is a C++ object, it's impossible to do
487 # sys.stdout.write = lambda...
488 sys.stdout = Unbuffered(sys.stdout)
489 os.environ['PYTHONUNBUFFERED'] = 'x'
490
491
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000492def fix_python_path(cmd):
493 """Returns the fixed command line to call the right python executable."""
494 out = cmd[:]
495 if out[0] == 'python':
496 out[0] = sys.executable
497 elif out[0].endswith('.py'):
498 out.insert(0, sys.executable)
499 return out
500
501
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000502def create_subprocess_thunk():
503 """Creates a small temporary script to start the child process.
504
505 This thunk doesn't block, its unique name is used to identify it as the
506 parent.
507 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000508 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000509 try:
510 os.write(
511 handle,
512 (
513 'import subprocess, sys\n'
514 'sys.exit(subprocess.call(sys.argv[2:]))\n'
515 ))
516 finally:
517 os.close(handle)
518 return name
519
520
521def create_exec_thunk():
522 """Creates a small temporary script to start the child executable.
523
524 Reads from the file handle provided as the fisrt argument to block, then
525 execv() the command to be traced.
526 """
527 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
528 try:
529 os.write(
530 handle,
531 (
532 'import os, sys\n'
533 'fd = int(sys.argv[1])\n'
534 # This will block until the controlling process writes a byte on the
535 # pipe. It will do so once the tracing tool, e.g. strace, is ready to
536 # trace.
537 'os.read(fd, 1)\n'
538 'os.close(fd)\n'
539 'os.execv(sys.argv[2], sys.argv[2:])\n'
540 ))
541 finally:
542 os.close(handle)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000543 return name
544
545
546def strace_process_quoted_arguments(text):
547 """Extracts quoted arguments on a string and return the arguments as a list.
548
549 Implemented as an automaton. Supports incomplete strings in the form
550 '"foo"...'.
551
552 Example:
553 With text = '"foo", "bar"', the function will return ['foo', 'bar']
554
555 TODO(maruel): Implement escaping.
556 """
557 # All the possible states of the DFA.
558 ( NEED_QUOTE, # Begining of a new arguments.
559 INSIDE_STRING, # Inside an argument.
560 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
561 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
562 # a serie of 3 dots or a comma.
563 NEED_SPACE, # Right after a comma
564 NEED_DOT_2, # Found a dot, need a second one.
565 NEED_DOT_3, # Found second dot, need a third one.
566 NEED_COMMA, # Found third dot, need a comma.
567 ) = range(8)
568
569 state = NEED_QUOTE
570 out = []
571 for index, char in enumerate(text):
572 if char == '"':
573 if state == NEED_QUOTE:
574 state = INSIDE_STRING
575 # A new argument was found.
576 out.append('')
577 elif state == INSIDE_STRING:
578 # The argument is now closed.
579 state = NEED_COMMA_OR_DOT
580 elif state == ESCAPED:
581 out[-1] += char
582 state = INSIDE_STRING
583 else:
584 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000585 'Can\'t process char \'%s\' at column %d for: %r' % (
586 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000587 index,
588 text)
589 elif char == ',':
590 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
591 state = NEED_SPACE
592 elif state == INSIDE_STRING:
593 out[-1] += char
594 elif state == ESCAPED:
595 out[-1] += char
596 state = INSIDE_STRING
597 else:
598 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000599 'Can\'t process char \'%s\' at column %d for: %r' % (
600 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000601 index,
602 text)
603 elif char == ' ':
604 if state == NEED_SPACE:
605 state = NEED_QUOTE
606 elif state == INSIDE_STRING:
607 out[-1] += char
608 elif state == ESCAPED:
609 out[-1] += char
610 state = INSIDE_STRING
611 else:
612 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000613 'Can\'t process char \'%s\' at column %d for: %r' % (
614 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000615 index,
616 text)
617 elif char == '.':
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000618 if state in (NEED_QUOTE, NEED_COMMA_OR_DOT):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000619 # The string is incomplete, this mean the strace -s flag should be
620 # increased.
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000621 # For NEED_QUOTE, the input string would look like '"foo", ...'.
622 # For NEED_COMMA_OR_DOT, the input string would look like '"foo"...'
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000623 state = NEED_DOT_2
624 elif state == NEED_DOT_2:
625 state = NEED_DOT_3
626 elif state == NEED_DOT_3:
627 state = NEED_COMMA
628 elif state == INSIDE_STRING:
629 out[-1] += char
630 elif state == ESCAPED:
631 out[-1] += char
632 state = INSIDE_STRING
633 else:
634 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000635 'Can\'t process char \'%s\' at column %d for: %r' % (
636 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000637 index,
638 text)
639 elif char == '\\':
640 if state == ESCAPED:
641 out[-1] += char
642 state = INSIDE_STRING
643 elif state == INSIDE_STRING:
644 state = ESCAPED
645 else:
646 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000647 'Can\'t process char \'%s\' at column %d for: %r' % (
648 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000649 index,
650 text)
651 else:
652 if state == INSIDE_STRING:
653 out[-1] += char
654 else:
655 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000656 'Can\'t process char \'%s\' at column %d for: %r' % (
657 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000658 index,
659 text)
660 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
661 raise ValueError(
662 'String is incorrectly terminated: %r' % text,
663 text)
664 return out
665
666
667def read_json(filepath):
668 with open(filepath, 'r') as f:
669 return json.load(f)
670
671
672def write_json(filepath_or_handle, data, dense):
673 """Writes data into filepath or file handle encoded as json.
674
675 If dense is True, the json is packed. Otherwise, it is human readable.
676 """
677 if hasattr(filepath_or_handle, 'write'):
678 if dense:
679 filepath_or_handle.write(
680 json.dumps(data, sort_keys=True, separators=(',',':')))
681 else:
682 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
683 else:
684 with open(filepath_or_handle, 'wb') as f:
685 if dense:
686 json.dump(data, f, sort_keys=True, separators=(',',':'))
687 else:
688 json.dump(data, f, sort_keys=True, indent=2)
689
690
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000691def assert_is_renderable(pseudo_string):
692 """Asserts the input is a valid object to be processed by render()."""
693 assert (
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000694 pseudo_string is None or
695 isinstance(pseudo_string, unicode) or
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000696 hasattr(pseudo_string, 'render')), repr(pseudo_string)
697
698
699def render(pseudo_string):
700 """Converts the pseudo-string to an unicode string."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000701 if pseudo_string is None or isinstance(pseudo_string, unicode):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000702 return pseudo_string
703 return pseudo_string.render()
704
705
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000706class Results(object):
707 """Results of a trace session."""
708
709 class _TouchedObject(object):
710 """Something, a file or a directory, that was accessed."""
711 def __init__(self, root, path, tainted, size, nb_files):
712 logging.debug(
713 '%s(%s, %s, %s, %s, %s)' %
714 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000715 assert_is_renderable(root)
716 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000717 self.root = root
718 self.path = path
719 self.tainted = tainted
720 self.nb_files = nb_files
maruel@chromium.org538141b2013-06-03 20:57:17 +0000721 # Can be used as a cache or a default value, depending on context. In
722 # particular, once self.tainted is True, because the path was replaced
723 # with a variable, it is not possible to look up the file size.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000724 self._size = size
725 # These are cache only.
726 self._real_path = None
727
728 # Check internal consistency.
729 assert path, path
730 assert tainted or bool(root) != bool(isabs(path)), (root, path)
731 assert tainted or (
732 not os.path.exists(self.full_path) or
733 (self.full_path == get_native_path_case(self.full_path))), (
734 tainted, self.full_path, get_native_path_case(self.full_path))
735
736 @property
737 def existent(self):
738 return self.size != -1
739
740 @property
741 def full_path(self):
742 if self.root:
743 return os.path.join(self.root, self.path)
744 return self.path
745
746 @property
747 def real_path(self):
748 """Returns the path with symlinks resolved."""
749 if not self._real_path:
750 self._real_path = os.path.realpath(self.full_path)
751 return self._real_path
752
753 @property
754 def size(self):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000755 """File's size. -1 is not existent.
756
757 Once tainted, it is not possible the retrieve the file size anymore since
758 the path is composed of variables.
759 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000760 if self._size is None and not self.tainted:
761 try:
762 self._size = os.stat(self.full_path).st_size
763 except OSError:
764 self._size = -1
765 return self._size
766
767 def flatten(self):
768 """Returns a dict representing this object.
769
770 A 'size' of 0 means the file was only touched and not read.
771 """
772 return {
773 'path': self.path,
774 'size': self.size,
775 }
776
777 def replace_variables(self, variables):
778 """Replaces the root of this File with one of the variables if it matches.
779
780 If a variable replacement occurs, the cloned object becomes tainted.
781 """
782 for variable, root_path in variables.iteritems():
783 if self.path.startswith(root_path):
784 return self._clone(
785 self.root, variable + self.path[len(root_path):], True)
786 # No need to clone, returns ourself.
787 return self
788
789 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000790 """Returns a clone of itself with 'root' stripped off.
791
792 Note that the file is kept if it is either accessible from a symlinked
793 path that was used to access the file or through the real path.
794 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000795 # Check internal consistency.
796 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root
797 if not self.full_path.startswith(root):
798 # Now try to resolve the symlinks to see if it can be reached this way.
799 # Only try *after* trying without resolving symlink.
800 if not self.real_path.startswith(root):
801 return None
802 path = self.real_path
803 else:
804 path = self.full_path
805 return self._clone(root, path[len(root):], self.tainted)
806
807 def _clone(self, new_root, new_path, tainted):
808 raise NotImplementedError(self.__class__.__name__)
809
810 class File(_TouchedObject):
811 """A file that was accessed. May not be present anymore.
812
813 If tainted is true, it means it is not a real path anymore as a variable
814 replacement occured.
815
maruel@chromium.org538141b2013-06-03 20:57:17 +0000816 |mode| can be one of None, TOUCHED, READ or WRITE.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000817 """
maruel@chromium.org538141b2013-06-03 20:57:17 +0000818 # Was probed for existence, and it is existent, but was never _opened_.
819 TOUCHED = 't'
820 # Opened for read only and guaranteed to not have been written to.
821 READ = 'r'
822 # Opened for write.
823 WRITE = 'w'
824
825 # They are listed in order of priority. E.g. if a file is traced as TOUCHED
826 # then as WRITE, only keep WRITE. None means no idea, which is a problem on
827 # Windows.
828 ACCEPTABLE_MODES = (None, TOUCHED, READ, WRITE)
829
830 def __init__(self, root, path, tainted, size, mode):
831 assert mode in self.ACCEPTABLE_MODES
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000832 super(Results.File, self).__init__(root, path, tainted, size, 1)
maruel@chromium.org538141b2013-06-03 20:57:17 +0000833 self.mode = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000834
835 def _clone(self, new_root, new_path, tainted):
836 """Clones itself keeping meta-data."""
837 # Keep the self.size and self._real_path caches for performance reason. It
838 # is also important when the file becomes tainted (with a variable instead
839 # of the real path) since self.path is not an on-disk path anymore so
840 # out._size cannot be updated.
maruel@chromium.org538141b2013-06-03 20:57:17 +0000841 out = self.__class__(new_root, new_path, tainted, self.size, self.mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000842 out._real_path = self._real_path
843 return out
844
maruel@chromium.org538141b2013-06-03 20:57:17 +0000845 def flatten(self):
846 out = super(Results.File, self).flatten()
847 out['mode'] = self.mode
848 return out
849
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000850 class Directory(_TouchedObject):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000851 """A directory of files. Must exist.
852
853 For a Directory instance, self.size is not a cache, it's an actual value
854 that is never modified and represents the total size of the files contained
855 in this directory. It is possible that the directory is empty so that
856 size==0; this happens if there's only an invalid symlink in it.
857 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000858 def __init__(self, root, path, tainted, size, nb_files):
859 """path='.' is a valid value and must be handled appropriately."""
860 assert not path.endswith(os.path.sep), path
861 super(Results.Directory, self).__init__(
862 root, path + os.path.sep, tainted, size, nb_files)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000863
864 def flatten(self):
865 out = super(Results.Directory, self).flatten()
866 out['nb_files'] = self.nb_files
867 return out
868
869 def _clone(self, new_root, new_path, tainted):
870 """Clones itself keeping meta-data."""
871 out = self.__class__(
872 new_root,
873 new_path.rstrip(os.path.sep),
874 tainted,
875 self.size,
876 self.nb_files)
877 out._real_path = self._real_path
878 return out
879
880 class Process(object):
881 """A process that was traced.
882
883 Contains references to the files accessed by this process and its children.
884 """
885 def __init__(self, pid, files, executable, command, initial_cwd, children):
886 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
887 self.pid = pid
888 self.files = sorted(files, key=lambda x: x.path)
889 self.children = children
890 self.executable = executable
891 self.command = command
892 self.initial_cwd = initial_cwd
893
894 # Check internal consistency.
895 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
896 f.path for f in self.files)
897 assert isinstance(self.children, list)
898 assert isinstance(self.files, list)
899
900 @property
901 def all(self):
902 for child in self.children:
903 for i in child.all:
904 yield i
905 yield self
906
907 def flatten(self):
908 return {
909 'children': [c.flatten() for c in self.children],
910 'command': self.command,
911 'executable': self.executable,
912 'files': [f.flatten() for f in self.files],
913 'initial_cwd': self.initial_cwd,
914 'pid': self.pid,
915 }
916
917 def strip_root(self, root):
918 assert isabs(root) and root.endswith(os.path.sep), root
919 # Loads the files after since they are constructed as objects.
920 out = self.__class__(
921 self.pid,
922 filter(None, (f.strip_root(root) for f in self.files)),
923 self.executable,
924 self.command,
925 self.initial_cwd,
926 [c.strip_root(root) for c in self.children])
927 logging.debug(
928 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
929 return out
930
931 def __init__(self, process):
932 self.process = process
933 # Cache.
934 self._files = None
935
936 def flatten(self):
937 return {
938 'root': self.process.flatten(),
939 }
940
941 @property
942 def files(self):
943 if self._files is None:
944 self._files = sorted(
945 sum((p.files for p in self.process.all), []),
946 key=lambda x: x.path)
947 return self._files
948
949 @property
950 def existent(self):
951 return [f for f in self.files if f.existent]
952
953 @property
954 def non_existent(self):
955 return [f for f in self.files if not f.existent]
956
957 def strip_root(self, root):
958 """Returns a clone with all the files outside the directory |root| removed
959 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000960
961 It keeps files accessible through the |root| directory or that have been
962 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000963 """
964 # Resolve any symlink
965 root = os.path.realpath(root)
966 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep
967 logging.debug('strip_root(%s)' % root)
968 return Results(self.process.strip_root(root))
969
970
971class ApiBase(object):
972 """OS-agnostic API to trace a process and its children."""
973 class Context(object):
974 """Processes one log line at a time and keeps the list of traced processes.
975
976 The parsing is complicated by the fact that logs are traced out of order for
977 strace but in-order for dtrace and logman. In addition, on Windows it is
978 very frequent that processids are reused so a flat list cannot be used. But
979 at the same time, it is impossible to faithfully construct a graph when the
980 logs are processed out of order. So both a tree and a flat mapping are used,
981 the tree is the real process tree, while the flat mapping stores the last
982 valid process for the corresponding processid. For the strace case, the
983 tree's head is guessed at the last moment.
984 """
985 class Process(object):
986 """Keeps context for one traced child process.
987
988 Logs all the files this process touched. Ignores directories.
989 """
990 def __init__(self, blacklist, pid, initial_cwd):
991 # Check internal consistency.
992 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000993 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000994 self.pid = pid
995 # children are Process instances.
996 self.children = []
997 self.initial_cwd = initial_cwd
998 self.cwd = None
maruel@chromium.org538141b2013-06-03 20:57:17 +0000999 self.files = {}
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001000 self.executable = None
1001 self.command = None
1002 self._blacklist = blacklist
1003
1004 def to_results_process(self):
1005 """Resolves file case sensitivity and or late-bound strings."""
1006 # When resolving files, it's normal to get dupe because a file could be
1007 # opened multiple times with different case. Resolve the deduplication
1008 # here.
1009 def fix_path(x):
1010 """Returns the native file path case.
1011
1012 Converts late-bound strings.
1013 """
1014 if not x:
1015 # Do not convert None instance to 'None'.
1016 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001017 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001018 if os.path.isabs(x):
1019 # If the path is not absolute, which tends to happen occasionally on
1020 # Windows, it is not possible to get the native path case so ignore
1021 # that trace. It mostly happens for 'executable' value.
1022 x = get_native_path_case(x)
1023 return x
1024
maruel@chromium.org538141b2013-06-03 20:57:17 +00001025 def fix_and_blacklist_path(x, m):
1026 """Receives a tuple (filepath, mode) and processes filepath."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001027 x = fix_path(x)
1028 if not x:
1029 return
1030 # The blacklist needs to be reapplied, since path casing could
1031 # influence blacklisting.
1032 if self._blacklist(x):
1033 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001034 # Filters out directories. Some may have passed through.
1035 if os.path.isdir(x):
1036 return
1037 return x, m
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001038
maruel@chromium.org538141b2013-06-03 20:57:17 +00001039 # Renders all the files as strings, as some could be RelativePath
1040 # instances. It is important to do it first since there could still be
1041 # multiple entries with the same path but different modes.
1042 rendered = (
1043 fix_and_blacklist_path(f, m) for f, m in self.files.iteritems())
1044 files = sorted(
1045 (f for f in rendered if f),
1046 key=lambda x: (x[0], Results.File.ACCEPTABLE_MODES.index(x[1])))
1047 # Then converting into a dict will automatically clean up lesser
1048 # important values.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001049 files = [
maruel@chromium.org538141b2013-06-03 20:57:17 +00001050 Results.File(None, f, False, None, m)
1051 for f, m in dict(files).iteritems()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001052 ]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001053 return Results.Process(
1054 self.pid,
1055 files,
1056 fix_path(self.executable),
1057 self.command,
1058 fix_path(self.initial_cwd),
1059 [c.to_results_process() for c in self.children])
1060
maruel@chromium.org538141b2013-06-03 20:57:17 +00001061 def add_file(self, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001062 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001063 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001064 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001065 logging.debug('add_file(%d, %s, %s)', self.pid, filepath, mode)
1066 # Note that filepath and not render(filepath) is added. It is because
1067 # filepath could be something else than a string, like a RelativePath
1068 # instance for dtrace logs.
1069 modes = Results.File.ACCEPTABLE_MODES
1070 old_mode = self.files.setdefault(filepath, mode)
1071 if old_mode != mode and modes.index(old_mode) < modes.index(mode):
1072 # Take the highest value.
1073 self.files[filepath] = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001074
1075 def __init__(self, blacklist):
1076 self.blacklist = blacklist
1077 # Initial process.
1078 self.root_process = None
1079 # dict to accelerate process lookup, to not have to lookup the whole graph
1080 # each time.
1081 self._process_lookup = {}
1082
1083 class Tracer(object):
1084 """During it's lifetime, the tracing subsystem is enabled."""
1085 def __init__(self, logname):
1086 self._logname = logname
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001087 self._lock = threading.RLock()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001088 self._traces = []
1089 self._initialized = True
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001090 self._scripts_to_cleanup = []
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001091
1092 def trace(self, cmd, cwd, tracename, output):
1093 """Runs the OS-specific trace program on an executable.
1094
1095 Arguments:
1096 - cmd: The command (a list) to run.
1097 - cwd: Current directory to start the child process in.
1098 - tracename: Name of the trace in the logname file.
1099 - output: If False, redirects output to PIPEs.
1100
1101 Returns a tuple (resultcode, output) and updates the internal trace
1102 entries.
1103 """
1104 # The implementation adds an item to self._traces.
1105 raise NotImplementedError(self.__class__.__name__)
1106
1107 def close(self, _timeout=None):
1108 """Saves the meta-data in the logname file.
1109
1110 For kernel-based tracing, stops the tracing subsystem.
1111
1112 Must not be used manually when using 'with' construct.
1113 """
1114 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001115 if not self._initialized:
1116 raise TracingFailure(
1117 'Called %s.close() on an unitialized object' %
1118 self.__class__.__name__,
1119 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001120 try:
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001121 while self._scripts_to_cleanup:
1122 try:
1123 os.remove(self._scripts_to_cleanup.pop())
1124 except OSError as e:
1125 logging.error('Failed to delete a temporary script: %s', e)
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001126 write_json(self._logname, self._gen_logdata(), False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001127 finally:
1128 self._initialized = False
1129
1130 def post_process_log(self):
1131 """Post-processes the log so it becomes faster to load afterward.
1132
1133 Must not be used manually when using 'with' construct.
1134 """
1135 assert not self._initialized, 'Must stop tracing first.'
1136
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001137 def _gen_logdata(self):
1138 """Returns the data to be saved in the trace file."""
1139 return {
1140 'traces': self._traces,
1141 }
1142
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001143 def __enter__(self):
1144 """Enables 'with' statement."""
1145 return self
1146
1147 def __exit__(self, exc_type, exc_value, traceback):
1148 """Enables 'with' statement."""
1149 self.close()
1150 # If an exception was thrown, do not process logs.
1151 if not exc_type:
1152 self.post_process_log()
1153
1154 def get_tracer(self, logname):
1155 """Returns an ApiBase.Tracer instance.
1156
1157 Initializes the tracing subsystem, which is a requirement for kernel-based
1158 tracers. Only one tracer instance should be live at a time!
1159
1160 logname is the filepath to the json file that will contain the meta-data
1161 about the logs.
1162 """
1163 return self.Tracer(logname)
1164
1165 @staticmethod
1166 def clean_trace(logname):
1167 """Deletes an old log."""
1168 raise NotImplementedError()
1169
1170 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001171 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001172 """Processes trace logs and returns the files opened and the files that do
1173 not exist.
1174
1175 It does not track directories.
1176
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001177 Arguments:
1178 - logname: must be an absolute path.
1179 - blacklist: must be a lambda.
1180 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001181
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001182 Most of the time, files that do not exist are temporary test files that
1183 should be put in /tmp instead. See http://crbug.com/116251.
1184
1185 Returns a list of dict with keys:
1186 - results: A Results instance.
1187 - trace: The corresponding tracename parameter provided to
1188 get_tracer().trace().
1189 - output: Output gathered during execution, if get_tracer().trace(...,
1190 output=False) was used.
1191 """
1192 raise NotImplementedError(cls.__class__.__name__)
1193
1194
1195class Strace(ApiBase):
1196 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001197 @staticmethod
1198 def load_filename(filename):
1199 """Parses a filename in a log."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001200 # TODO(maruel): Be compatible with strace -x.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001201 assert isinstance(filename, str)
1202 out = ''
1203 i = 0
1204 while i < len(filename):
1205 c = filename[i]
1206 if c == '\\':
1207 out += chr(int(filename[i+1:i+4], 8))
1208 i += 4
1209 else:
1210 out += c
1211 i += 1
1212 # TODO(maruel): That's not necessarily true that the current code page is
1213 # utf-8.
1214 return out.decode('utf-8')
1215
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001216 class Context(ApiBase.Context):
1217 """Processes a strace log line and keeps the list of existent and non
1218 existent files accessed.
1219
1220 Ignores directories.
1221
1222 Uses late-binding to processes the cwd of each process. The problem is that
1223 strace generates one log file per process it traced but doesn't give any
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001224 information about which process was started when and by who. So process the
1225 logs out of order and use late binding with RelativePath to be able to
1226 deduce the initial directory of each process once all the logs are parsed.
1227
1228 TODO(maruel): Use the script even in the non-sudo case, so log parsing can
1229 be done in two phase: first find the root process, then process the child
1230 processes in order. With that, it should be possible to not use RelativePath
1231 anymore. This would significantly simplify the code!
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001232 """
1233 class Process(ApiBase.Context.Process):
1234 """Represents the state of a process.
1235
1236 Contains all the information retrieved from the pid-specific log.
1237 """
1238 # Function names are using ([a-z_0-9]+)
1239 # This is the most common format. function(args) = result
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001240 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.*?)\)\s+= (.+)$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001241 # An interrupted function call, only grab the minimal header.
1242 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
1243 # A resumed function call.
1244 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
1245 # A process received a signal.
1246 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
1247 # A process didn't handle a signal. Ignore any junk appearing before,
1248 # because the process was forcibly killed so it won't open any new file.
1249 RE_KILLED = re.compile(
1250 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
1251 # The process has exited.
1252 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
1253 # A call was canceled. Ignore any prefix.
1254 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
1255 # Happens when strace fails to even get the function name.
1256 UNNAMED_FUNCTION = '????'
1257
1258 # Corner-case in python, a class member function decorator must not be
1259 # @staticmethod.
1260 def parse_args(regexp, expect_zero): # pylint: disable=E0213
1261 """Automatically convert the str 'args' into a list of processed
1262 arguments.
1263
1264 Arguments:
1265 - regexp is used to parse args.
1266 - expect_zero: one of True, False or None.
1267 - True: will check for result.startswith('0') first and will ignore
1268 the trace line completely otherwise. This is important because for
1269 many functions, the regexp will not process if the call failed.
1270 - False: will check for not result.startswith(('?', '-1')) for the
1271 same reason than with True.
1272 - None: ignore result.
1273 """
1274 def meta_hook(function):
1275 assert function.__name__.startswith('handle_')
1276 def hook(self, args, result):
1277 if expect_zero is True and not result.startswith('0'):
1278 return
1279 if expect_zero is False and result.startswith(('?', '-1')):
1280 return
1281 match = re.match(regexp, args)
1282 if not match:
1283 raise TracingFailure(
1284 'Failed to parse %s(%s) = %s' %
1285 (function.__name__[len('handle_'):], args, result),
1286 None, None, None)
1287 return function(self, match.groups(), result)
1288 return hook
1289 return meta_hook
1290
1291 class RelativePath(object):
1292 """A late-bound relative path."""
1293 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001294 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001295 self.parent = parent
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001296 assert (
1297 value is None or
1298 (isinstance(value, unicode) and not os.path.isabs(value)))
1299 self.value = value
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001300 if self.value:
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001301 # TODO(maruel): On POSIX, '\\' is a valid character so remove this
1302 # assert.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001303 assert '\\' not in self.value, value
1304 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001305
1306 def render(self):
1307 """Returns the current directory this instance is representing.
1308
1309 This function is used to return the late-bound value.
1310 """
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001311 assert self.parent is not None
1312 parent = render(self.parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001313 if self.value:
1314 return os.path.normpath(os.path.join(parent, self.value))
1315 return parent
1316
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001317 def __init__(self, root, pid):
1318 """Keeps enough information to be able to guess the original process
1319 root.
1320
1321 strace doesn't store which process was the initial process. So more
1322 information needs to be kept so the graph can be reconstructed from the
1323 flat map.
1324 """
1325 logging.info('%s(%d)' % (self.__class__.__name__, pid))
1326 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
1327 assert isinstance(root, ApiBase.Context)
1328 self._root = weakref.ref(root)
1329 # The dict key is the function name of the pending call, like 'open'
1330 # or 'execve'.
1331 self._pending_calls = {}
1332 self._line_number = 0
1333 # Current directory when the process started.
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001334 if isinstance(self._root(), unicode):
1335 self.initial_cwd = self._root()
1336 else:
1337 self.initial_cwd = self.RelativePath(self._root(), None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001338 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001339 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001340
1341 def get_cwd(self):
1342 """Returns the best known value of cwd."""
1343 return self.cwd or self.initial_cwd
1344
1345 def render(self):
1346 """Returns the string value of the RelativePath() object.
1347
1348 Used by RelativePath. Returns the initial directory and not the
1349 current one since the current directory 'cwd' validity is time-limited.
1350
1351 The validity is only guaranteed once all the logs are processed.
1352 """
1353 return self.initial_cwd.render()
1354
1355 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001356 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001357 self._line_number += 1
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001358 try:
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001359 if self._done:
1360 raise TracingFailure(
1361 'Found a trace for a terminated process or corrupted log',
1362 None, None, None)
1363
1364 if self.RE_SIGNAL.match(line):
1365 # Ignore signals.
1366 return
1367
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001368 match = self.RE_KILLED.match(line)
1369 if match:
1370 # Converts a '+++ killed by Foo +++' trace into an exit_group().
1371 self.handle_exit_group(match.group(1), None)
1372 return
1373
1374 match = self.RE_PROCESS_EXITED.match(line)
1375 if match:
1376 # Converts a '+++ exited with 1 +++' trace into an exit_group()
1377 self.handle_exit_group(match.group(1), None)
1378 return
1379
1380 match = self.RE_UNFINISHED.match(line)
1381 if match:
1382 if match.group(1) in self._pending_calls:
1383 raise TracingFailure(
1384 'Found two unfinished calls for the same function',
1385 None, None, None,
1386 self._pending_calls)
1387 self._pending_calls[match.group(1)] = (
1388 match.group(1) + match.group(2))
1389 return
1390
1391 match = self.RE_UNAVAILABLE.match(line)
1392 if match:
1393 # This usually means a process was killed and a pending call was
1394 # canceled.
1395 # TODO(maruel): Look up the last exit_group() trace just above and
1396 # make sure any self._pending_calls[anything] is properly flushed.
1397 return
1398
1399 match = self.RE_RESUMED.match(line)
1400 if match:
1401 if match.group(1) not in self._pending_calls:
1402 raise TracingFailure(
1403 'Found a resumed call that was not logged as unfinished',
1404 None, None, None,
1405 self._pending_calls)
1406 pending = self._pending_calls.pop(match.group(1))
1407 # Reconstruct the line.
1408 line = pending + match.group(2)
1409
1410 match = self.RE_HEADER.match(line)
1411 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001412 # The line is corrupted. It happens occasionally when a process is
1413 # killed forcibly with activity going on. Assume the process died.
1414 # No other line can be processed afterward.
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001415 logging.debug('%d is done: %s', self.pid, line)
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001416 self._done = True
1417 return
1418
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001419 if match.group(1) == self.UNNAMED_FUNCTION:
1420 return
1421
1422 # It's a valid line, handle it.
1423 handler = getattr(self, 'handle_%s' % match.group(1), None)
1424 if not handler:
1425 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1426 return handler(match.group(2), match.group(3))
1427 except TracingFailure, e:
1428 # Hack in the values since the handler could be a static function.
1429 e.pid = self.pid
1430 e.line = line
1431 e.line_number = self._line_number
1432 # Re-raise the modified exception.
1433 raise
1434 except (KeyError, NotImplementedError, ValueError), e:
1435 raise TracingFailure(
1436 'Trace generated a %s exception: %s' % (
1437 e.__class__.__name__, str(e)),
1438 self.pid,
1439 self._line_number,
1440 line,
1441 e)
1442
1443 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1444 def handle_access(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001445 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001446
1447 @parse_args(r'^\"(.+?)\"$', True)
1448 def handle_chdir(self, args, _result):
1449 """Updates cwd."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001450 self.cwd = self._mangle(args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001451 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1452
maruel@chromium.org538141b2013-06-03 20:57:17 +00001453 @parse_args(r'^\"(.+?)\", (\d+), (\d+)$', False)
1454 def handle_chown(self, args, _result):
1455 # TODO(maruel): Look at result?
1456 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org55550722013-05-28 14:43:14 +00001457
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001458 def handle_clone(self, _args, result):
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001459 self._handling_forking('clone', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001460
1461 def handle_close(self, _args, _result):
1462 pass
1463
maruel@chromium.org538141b2013-06-03 20:57:17 +00001464 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1465 def handle_chmod(self, args, _result):
1466 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001467
maruel@chromium.org538141b2013-06-03 20:57:17 +00001468 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1469 def handle_creat(self, args, _result):
1470 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001471
1472 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1473 def handle_execve(self, args, _result):
1474 # Even if in practice execve() doesn't returns when it succeeds, strace
1475 # still prints '0' as the result.
1476 filepath = args[0]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001477 self._handle_file(filepath, Results.File.READ)
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001478 self.executable = self._mangle(filepath)
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001479 try:
1480 self.command = strace_process_quoted_arguments(args[1])
1481 except ValueError as e:
1482 raise TracingFailure(
1483 'Failed to process command line argument:\n%s' % e.args[0],
1484 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001485
1486 def handle_exit_group(self, _args, _result):
1487 """Removes cwd."""
1488 self.cwd = None
1489
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001490 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1491 def handle_faccessat(self, args, _results):
1492 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001493 self._handle_file(args[1], Results.File.TOUCHED)
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001494 else:
1495 raise Exception('Relative faccess not implemented.')
1496
maruel@chromium.org0781f322013-05-28 19:45:49 +00001497 def handle_fallocate(self, _args, result):
1498 pass
1499
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001500 def handle_fork(self, args, result):
1501 self._handle_unknown('fork', args, result)
1502
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001503 def handle_futex(self, _args, _result):
1504 pass
1505
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001506 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1507 def handle_getcwd(self, args, _result):
1508 if os.path.isabs(args[0]):
1509 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1510 if not isinstance(self.cwd, unicode):
1511 # Take the occasion to reset the path.
1512 self.cwd = self._mangle(args[0])
1513 else:
1514 # It should always match.
1515 assert self.cwd == Strace.load_filename(args[0]), (
1516 self.cwd, args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001517
1518 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1519 def handle_link(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001520 self._handle_file(args[0], Results.File.READ)
1521 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001522
1523 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1524 def handle_lstat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001525 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001526
1527 def handle_mkdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001528 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001529 pass
1530
1531 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1532 def handle_open(self, args, _result):
1533 if 'O_DIRECTORY' in args[1]:
1534 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001535 self._handle_file(
1536 args[0],
1537 Results.File.READ if 'O_RDONLY' in args[1] else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001538
1539 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1540 def handle_openat(self, args, _result):
1541 if 'O_DIRECTORY' in args[2]:
1542 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001543 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001544 self._handle_file(
1545 args[1],
1546 Results.File.READ if 'O_RDONLY' in args[2]
1547 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001548 else:
1549 # TODO(maruel): Implement relative open if necessary instead of the
1550 # AT_FDCWD flag, let's hope not since this means tracking all active
1551 # directory handles.
maruel@chromium.org538141b2013-06-03 20:57:17 +00001552 raise NotImplementedError('Relative open via openat not implemented.')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001553
1554 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1555 def handle_readlink(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001556 self._handle_file(args[0], Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001557
1558 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1559 def handle_rename(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001560 self._handle_file(args[0], Results.File.READ)
1561 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001562
1563 def handle_rmdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001564 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001565 pass
1566
1567 def handle_setxattr(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001568 # TODO(maruel): self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001569 pass
1570
1571 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1572 def handle_stat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001573 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001574
maruel@chromium.org538141b2013-06-03 20:57:17 +00001575 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1576 def handle_symlink(self, args, _result):
1577 self._handle_file(args[0], Results.File.TOUCHED)
1578 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001579
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001580 @parse_args(r'^\"(.+?)\", \d+', True)
1581 def handle_truncate(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001582 self._handle_file(args[0], Results.File.WRITE)
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001583
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001584 def handle_unlink(self, _args, _result):
1585 # In theory, the file had to be created anyway.
1586 pass
1587
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001588 def handle_unlinkat(self, _args, _result):
1589 # In theory, the file had to be created anyway.
1590 pass
1591
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001592 def handle_statfs(self, _args, _result):
1593 pass
1594
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001595 def handle_utimensat(self, _args, _result):
1596 pass
1597
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001598 def handle_vfork(self, _args, result):
1599 self._handling_forking('vfork', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001600
1601 @staticmethod
1602 def _handle_unknown(function, args, result):
1603 raise TracingFailure(
1604 'Unexpected/unimplemented trace %s(%s)= %s' %
1605 (function, args, result),
1606 None, None, None)
1607
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001608 def _handling_forking(self, name, result):
1609 """Transfers cwd."""
1610 if result.startswith(('?', '-1')):
1611 # The call failed.
1612 return
1613 # Update the other process right away.
1614 childpid = int(result)
1615 child = self._root().get_or_set_proc(childpid)
1616 if child.parentid is not None or childpid in self.children:
1617 raise TracingFailure(
1618 'Found internal inconsitency in process lifetime detection '
1619 'during a %s() call' % name,
1620 None, None, None)
1621
1622 # Copy the cwd object.
1623 child.initial_cwd = self.get_cwd()
1624 child.parentid = self.pid
1625 # It is necessary because the logs are processed out of order.
1626 self.children.append(child)
1627
maruel@chromium.org538141b2013-06-03 20:57:17 +00001628 def _handle_file(self, filepath, mode):
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001629 filepath = self._mangle(filepath)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001630 self.add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001631
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001632 def _mangle(self, filepath):
1633 """Decodes a filepath found in the log and convert it to a late-bound
1634 path if necessary.
1635
1636 |filepath| is an strace 'encoded' string and the returned value is
1637 either an unicode string if the path was absolute or a late bound path
1638 otherwise.
1639 """
1640 filepath = Strace.load_filename(filepath)
1641 if os.path.isabs(filepath):
1642 return filepath
1643 else:
1644 if isinstance(self.get_cwd(), unicode):
1645 return os.path.normpath(os.path.join(self.get_cwd(), filepath))
1646 return self.RelativePath(self.get_cwd(), filepath)
1647
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001648 def __init__(self, blacklist, root_pid, initial_cwd):
1649 """|root_pid| may be None when the root process is not known.
1650
1651 In that case, a search is done after reading all the logs to figure out
1652 the root process.
1653 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001654 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001655 assert_is_renderable(initial_cwd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001656 self.root_pid = root_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001657 self.initial_cwd = initial_cwd
1658
1659 def render(self):
1660 """Returns the string value of the initial cwd of the root process.
1661
1662 Used by RelativePath.
1663 """
1664 return self.initial_cwd
1665
1666 def on_line(self, pid, line):
1667 """Transfers control into the Process.on_line() function."""
1668 self.get_or_set_proc(pid).on_line(line.strip())
1669
1670 def to_results(self):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001671 """If necessary, finds back the root process and verify consistency."""
1672 if not self.root_pid:
1673 # The non-sudo case. The traced process was started by strace itself,
1674 # so the pid of the traced process is not known.
1675 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1676 if len(root) == 1:
1677 self.root_process = root[0]
1678 # Save it for later.
1679 self.root_pid = self.root_process.pid
1680 else:
1681 # The sudo case. The traced process was started manually so its pid is
1682 # known.
1683 self.root_process = self._process_lookup.get(self.root_pid)
1684 if not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001685 raise TracingFailure(
1686 'Found internal inconsitency in process lifetime detection '
1687 'while finding the root process',
1688 None,
1689 None,
1690 None,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001691 self.root_pid,
1692 sorted(self._process_lookup))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001693 process = self.root_process.to_results_process()
1694 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1695 raise TracingFailure(
1696 'Found internal inconsitency in process lifetime detection '
1697 'while looking for len(tree) == len(list)',
1698 None,
1699 None,
1700 None,
1701 sorted(self._process_lookup),
1702 sorted(p.pid for p in process.all))
1703 return Results(process)
1704
1705 def get_or_set_proc(self, pid):
1706 """Returns the Context.Process instance for this pid or creates a new one.
1707 """
1708 if not pid or not isinstance(pid, int):
1709 raise TracingFailure(
1710 'Unpexpected value for pid: %r' % pid,
1711 pid,
1712 None,
1713 None,
1714 pid)
1715 if pid not in self._process_lookup:
1716 self._process_lookup[pid] = self.Process(self, pid)
1717 return self._process_lookup[pid]
1718
1719 @classmethod
1720 def traces(cls):
1721 """Returns the list of all handled traces to pass this as an argument to
1722 strace.
1723 """
1724 prefix = 'handle_'
1725 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1726
1727 class Tracer(ApiBase.Tracer):
1728 MAX_LEN = 256
1729
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001730 def __init__(self, logname, use_sudo):
1731 super(Strace.Tracer, self).__init__(logname)
1732 self.use_sudo = use_sudo
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001733 if use_sudo:
1734 # TODO(maruel): Use the jump script systematically to make it easy to
1735 # figure out the root process, so RelativePath is not necessary anymore.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001736 self._child_script = create_exec_thunk()
1737 self._scripts_to_cleanup.append(self._child_script)
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001738
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001739 def trace(self, cmd, cwd, tracename, output):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001740 """Runs strace on an executable.
1741
1742 When use_sudo=True, it is a 3-phases process: start the thunk, start
1743 sudo strace with the pid of the thunk and then have the thunk os.execve()
1744 the process to trace.
1745 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001746 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1747 assert os.path.isabs(cmd[0]), cmd[0]
1748 assert os.path.isabs(cwd), cwd
1749 assert os.path.normpath(cwd) == cwd, cwd
1750 with self._lock:
1751 if not self._initialized:
1752 raise TracingFailure(
1753 'Called Tracer.trace() on an unitialized object',
1754 None, None, None, tracename)
1755 assert tracename not in (i['trace'] for i in self._traces)
1756 stdout = stderr = None
1757 if output:
1758 stdout = subprocess.PIPE
1759 stderr = subprocess.STDOUT
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001760
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001761 # Ensure all file related APIs are hooked.
1762 traces = ','.join(Strace.Context.traces() + ['file'])
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001763 flags = [
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001764 # Each child process has its own trace file. It is necessary because
1765 # strace may generate corrupted log file if multiple processes are
1766 # heavily doing syscalls simultaneously.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001767 '-ff',
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001768 # Reduce whitespace usage.
1769 '-a1',
1770 # hex encode non-ascii strings.
1771 # TODO(maruel): '-x',
1772 # TODO(maruel): '-ttt',
1773 # Signals are unnecessary noise here. Note the parser can cope with them
1774 # but reduce the unnecessary output.
1775 '-esignal=none',
1776 # Print as much data as wanted.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001777 '-s', '%d' % self.MAX_LEN,
1778 '-e', 'trace=%s' % traces,
1779 '-o', self._logname + '.' + tracename,
1780 ]
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001781
1782 if self.use_sudo:
1783 pipe_r, pipe_w = os.pipe()
1784 # Start the child process paused.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001785 target_cmd = [sys.executable, self._child_script, str(pipe_r)] + cmd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001786 logging.debug(' '.join(target_cmd))
1787 child_proc = subprocess.Popen(
1788 target_cmd,
1789 stdin=subprocess.PIPE,
1790 stdout=stdout,
1791 stderr=stderr,
1792 cwd=cwd)
1793
1794 # TODO(maruel): both processes must use the same UID for it to work
1795 # without sudo. Look why -p is failing at the moment without sudo.
1796 trace_cmd = [
1797 'sudo',
1798 'strace',
1799 '-p', str(child_proc.pid),
1800 ] + flags
1801 logging.debug(' '.join(trace_cmd))
1802 strace_proc = subprocess.Popen(
1803 trace_cmd,
1804 cwd=cwd,
1805 stdin=subprocess.PIPE,
1806 stdout=subprocess.PIPE,
1807 stderr=subprocess.PIPE)
1808
1809 line = strace_proc.stderr.readline()
1810 if not re.match(r'^Process \d+ attached \- interrupt to quit$', line):
1811 # TODO(maruel): Raise an exception.
1812 assert False, line
1813
1814 # Now fire the child process.
1815 os.write(pipe_w, 'x')
1816
1817 out = child_proc.communicate()[0]
1818 strace_out = strace_proc.communicate()[0]
1819
1820 # TODO(maruel): if strace_proc.returncode: Add an exception.
1821 saved_out = strace_out if strace_proc.returncode else out
1822 root_pid = child_proc.pid
1823 else:
1824 # Non-sudo case.
1825 trace_cmd = [
1826 'strace',
1827 ] + flags + cmd
1828 logging.debug(' '.join(trace_cmd))
1829 child_proc = subprocess.Popen(
1830 trace_cmd,
1831 cwd=cwd,
1832 stdin=subprocess.PIPE,
1833 stdout=stdout,
1834 stderr=stderr)
1835 out = child_proc.communicate()[0]
1836 # TODO(maruel): Walk the logs and figure out the root process would
1837 # simplify parsing the logs a *lot*.
1838 saved_out = out
1839 # The trace reader will have to figure out.
1840 root_pid = None
1841
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001842 with self._lock:
1843 assert tracename not in (i['trace'] for i in self._traces)
1844 self._traces.append(
1845 {
1846 'cmd': cmd,
1847 'cwd': cwd,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001848 'output': saved_out,
1849 'pid': root_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001850 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001851 })
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001852 return child_proc.returncode, out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001853
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001854 def __init__(self, use_sudo=None):
1855 super(Strace, self).__init__()
1856 self.use_sudo = use_sudo
1857
1858 def get_tracer(self, logname):
1859 return self.Tracer(logname, self.use_sudo)
1860
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001861 @staticmethod
1862 def clean_trace(logname):
1863 if os.path.isfile(logname):
1864 os.remove(logname)
1865 # Also delete any pid specific file from previous traces.
1866 for i in glob.iglob(logname + '.*'):
1867 if i.rsplit('.', 1)[1].isdigit():
1868 os.remove(i)
1869
1870 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001871 def parse_log(cls, logname, blacklist, trace_name):
1872 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001873 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001874 data = read_json(logname)
1875 out = []
1876 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001877 if trace_name and item['trace'] != trace_name:
1878 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001879 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001880 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001881 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001882 }
1883 try:
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001884 context = cls.Context(blacklist, item['pid'], item['cwd'])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001885 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001886 logging.debug('Reading %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001887 pid = pidfile.rsplit('.', 1)[1]
1888 if pid.isdigit():
1889 pid = int(pid)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001890 found_line = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001891 for line in open(pidfile, 'rb'):
1892 context.on_line(pid, line)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001893 found_line = True
1894 if not found_line:
1895 # Ensures that a completely empty trace still creates the
1896 # corresponding Process instance by logging a dummy line.
1897 context.on_line(pid, '')
1898 else:
1899 logging.warning('Found unexpected file %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001900 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001901 except TracingFailure:
1902 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001903 out.append(result)
1904 return out
1905
1906
1907class Dtrace(ApiBase):
1908 """Uses DTrace framework through dtrace. Requires root access.
1909
1910 Implies Mac OSX.
1911
1912 dtruss can't be used because it has compatibility issues with python.
1913
1914 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1915 get the absolute path of the 'cwd' dtrace variable from the probe.
1916
1917 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1918 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1919 manually.
1920
1921 errno is not printed in the log since this implementation currently only cares
1922 about files that were successfully opened.
1923 """
1924 class Context(ApiBase.Context):
1925 # Format: index pid function(args)
1926 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1927
1928 # Arguments parsing.
1929 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1930 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1931 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1932 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1933 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1934 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1935
maruel@chromium.org538141b2013-06-03 20:57:17 +00001936 O_DIRECTORY = os.O_DIRECTORY
1937 O_RDWR = os.O_RDWR
1938 O_WRONLY = os.O_WRONLY
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001939
1940 class Process(ApiBase.Context.Process):
1941 def __init__(self, *args):
1942 super(Dtrace.Context.Process, self).__init__(*args)
1943 self.cwd = self.initial_cwd
1944
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001945 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001946 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001947 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001948 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001949 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001950 # Process ID of the temporary script created by create_subprocess_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001951 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001952 self._initial_cwd = initial_cwd
1953 self._line_number = 0
1954
1955 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001956 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001957 self._line_number += 1
1958 match = self.RE_HEADER.match(line)
1959 if not match:
1960 raise TracingFailure(
1961 'Found malformed line: %s' % line,
1962 None,
1963 self._line_number,
1964 line)
1965 fn = getattr(
1966 self,
1967 'handle_%s' % match.group(2).replace('-', '_'),
1968 self._handle_ignored)
1969 # It is guaranteed to succeed because of the regexp. Or at least I thought
1970 # it would.
1971 pid = int(match.group(1))
1972 try:
1973 return fn(pid, match.group(3))
1974 except TracingFailure, e:
1975 # Hack in the values since the handler could be a static function.
1976 e.pid = pid
1977 e.line = line
1978 e.line_number = self._line_number
1979 # Re-raise the modified exception.
1980 raise
1981 except (KeyError, NotImplementedError, ValueError), e:
1982 raise TracingFailure(
1983 'Trace generated a %s exception: %s' % (
1984 e.__class__.__name__, str(e)),
1985 pid,
1986 self._line_number,
1987 line,
1988 e)
1989
1990 def to_results(self):
1991 process = self.root_process.to_results_process()
1992 # Internal concistency check.
1993 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1994 raise TracingFailure(
1995 'Found internal inconsitency in process lifetime detection '
1996 'while looking for len(tree) == len(list)',
1997 None,
1998 None,
1999 None,
2000 sorted(self._process_lookup),
2001 sorted(p.pid for p in process.all))
2002 return Results(process)
2003
2004 def handle_dtrace_BEGIN(self, _pid, args):
2005 if not self.RE_DTRACE_BEGIN.match(args):
2006 raise TracingFailure(
2007 'Found internal inconsitency in dtrace_BEGIN log line',
2008 None, None, None)
2009
2010 def handle_proc_start(self, pid, args):
2011 """Transfers cwd.
2012
2013 The dtrace script already takes care of only tracing the processes that
2014 are child of the traced processes so there is no need to verify the
2015 process hierarchy.
2016 """
2017 if pid in self._process_lookup:
2018 raise TracingFailure(
2019 'Found internal inconsitency in proc_start: %d started two times' %
2020 pid,
2021 None, None, None)
2022 match = self.RE_PROC_START.match(args)
2023 if not match:
2024 raise TracingFailure(
2025 'Failed to parse arguments: %s' % args,
2026 None, None, None)
2027 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002028 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002029 proc = self.root_process = self.Process(
2030 self.blacklist, pid, self._initial_cwd)
2031 elif ppid in self._process_lookup:
2032 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
2033 self._process_lookup[ppid].children.append(proc)
2034 else:
2035 # Another process tree, ignore.
2036 return
2037 self._process_lookup[pid] = proc
2038 logging.debug(
2039 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002040 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002041
2042 def handle_proc_exit(self, pid, _args):
2043 """Removes cwd."""
2044 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002045 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002046 self._process_lookup[pid].cwd = None
2047
2048 def handle_execve(self, pid, args):
2049 """Sets the process' executable.
2050
2051 TODO(maruel): Read command line arguments. See
2052 https://discussions.apple.com/thread/1980539 for an example.
2053 https://gist.github.com/1242279
2054
2055 Will have to put the answer at http://stackoverflow.com/questions/7556249.
2056 :)
2057 """
2058 if not pid in self._process_lookup:
2059 # Another process tree, ignore.
2060 return
2061 match = self.RE_EXECVE.match(args)
2062 if not match:
2063 raise TracingFailure(
2064 'Failed to parse arguments: %r' % args,
2065 None, None, None)
2066 proc = self._process_lookup[pid]
2067 proc.executable = match.group(1)
maruel@chromium.org538141b2013-06-03 20:57:17 +00002068 self._handle_file(pid, proc.executable, Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002069 proc.command = self.process_escaped_arguments(match.group(3))
2070 if int(match.group(2)) != len(proc.command):
2071 raise TracingFailure(
2072 'Failed to parse execve() arguments: %s' % args,
2073 None, None, None)
2074
2075 def handle_chdir(self, pid, args):
2076 """Updates cwd."""
2077 if pid not in self._process_lookup:
2078 # Another process tree, ignore.
2079 return
2080 cwd = self.RE_CHDIR.match(args).group(1)
2081 if not cwd.startswith('/'):
2082 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
2083 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
2084 else:
2085 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
2086 cwd2 = cwd
2087 self._process_lookup[pid].cwd = cwd2
2088
2089 def handle_open_nocancel(self, pid, args):
2090 """Redirects to handle_open()."""
2091 return self.handle_open(pid, args)
2092
2093 def handle_open(self, pid, args):
2094 if pid not in self._process_lookup:
2095 # Another process tree, ignore.
2096 return
2097 match = self.RE_OPEN.match(args)
2098 if not match:
2099 raise TracingFailure(
2100 'Failed to parse arguments: %s' % args,
2101 None, None, None)
2102 flag = int(match.group(2), 16)
2103 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
2104 # Ignore directories.
2105 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002106 self._handle_file(
2107 pid,
2108 match.group(1),
2109 Results.File.READ if not ((self.O_RDWR | self.O_WRONLY) & flag)
2110 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002111
2112 def handle_rename(self, pid, args):
2113 if pid not in self._process_lookup:
2114 # Another process tree, ignore.
2115 return
2116 match = self.RE_RENAME.match(args)
2117 if not match:
2118 raise TracingFailure(
2119 'Failed to parse arguments: %s' % args,
2120 None, None, None)
maruel@chromium.org538141b2013-06-03 20:57:17 +00002121 self._handle_file(pid, match.group(1), Results.File.READ)
2122 self._handle_file(pid, match.group(2), Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002123
maruel@chromium.org538141b2013-06-03 20:57:17 +00002124 def _handle_file(self, pid, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002125 if not filepath.startswith('/'):
2126 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
2127 # We can get '..' in the path.
2128 filepath = os.path.normpath(filepath)
2129 # Sadly, still need to filter out directories here;
2130 # saw open_nocancel(".", 0, 0) = 0 lines.
2131 if os.path.isdir(filepath):
2132 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002133 self._process_lookup[pid].add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002134
2135 def handle_ftruncate(self, pid, args):
2136 """Just used as a signal to kill dtrace, ignoring."""
2137 pass
2138
2139 @staticmethod
2140 def _handle_ignored(pid, args):
2141 """Is called for all the event traces that are not handled."""
2142 raise NotImplementedError('Please implement me')
2143
2144 @staticmethod
2145 def process_escaped_arguments(text):
2146 """Extracts escaped arguments on a string and return the arguments as a
2147 list.
2148
2149 Implemented as an automaton.
2150
2151 Example:
2152 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
2153 function will return ['python2.7', '-c', 'print("hi")]
2154 """
2155 if not text.endswith('\\0'):
2156 raise ValueError('String is not null terminated: %r' % text, text)
2157 text = text[:-2]
2158
2159 def unescape(x):
2160 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
2161 out = []
2162 escaped = False
2163 for i in x:
2164 if i == '\\' and not escaped:
2165 escaped = True
2166 continue
2167 escaped = False
2168 out.append(i)
2169 return ''.join(out)
2170
2171 return [unescape(i) for i in text.split('\\001')]
2172
2173 class Tracer(ApiBase.Tracer):
2174 # pylint: disable=C0301
2175 #
2176 # To understand the following code, you'll want to take a look at:
2177 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
2178 # https://wikis.oracle.com/display/DTrace/Variables
2179 # http://docs.oracle.com/cd/E19205-01/820-4221/
2180 #
2181 # 0. Dump all the valid probes into a text file. It is important, you
2182 # want to redirect into a file and you don't want to constantly 'sudo'.
2183 # $ sudo dtrace -l > probes.txt
2184 #
2185 # 1. Count the number of probes:
2186 # $ wc -l probes.txt
2187 # 81823 # On OSX 10.7, including 1 header line.
2188 #
2189 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
2190 # likes and skipping the header with NR>1:
2191 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
2192 # dtrace
2193 # fbt
2194 # io
2195 # ip
2196 # lockstat
2197 # mach_trap
2198 # proc
2199 # profile
2200 # sched
2201 # syscall
2202 # tcp
2203 # vminfo
2204 #
2205 # 3. List of valid probes:
2206 # $ grep syscall probes.txt | less
2207 # or use dtrace directly:
2208 # $ sudo dtrace -l -P syscall | less
2209 #
2210 # trackedpid is an associative array where its value can be 0, 1 or 2.
2211 # 0 is for untracked processes and is the default value for items not
2212 # in the associative array.
2213 # 1 is for tracked processes.
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002214 # 2 is for the script created by create_subprocess_thunk() only. It is not
2215 # tracked itself but all its decendants are.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002216 #
2217 # The script will kill itself only once waiting_to_die == 1 and
2218 # current_processes == 0, so that both getlogin() was called and that
2219 # all traced processes exited.
2220 #
2221 # TODO(maruel): Use cacheable predicates. See
2222 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
2223 D_CODE = """
2224 dtrace:::BEGIN {
2225 waiting_to_die = 0;
2226 current_processes = 0;
2227 logindex = 0;
2228 printf("%d %d %s_%s(\\"%s\\")\\n",
2229 logindex, PID, probeprov, probename, SCRIPT);
2230 logindex++;
2231 }
2232
2233 proc:::start /trackedpid[ppid]/ {
2234 trackedpid[pid] = 1;
2235 current_processes += 1;
2236 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
2237 logindex, pid, probeprov, probename,
2238 ppid,
2239 execname,
2240 current_processes);
2241 logindex++;
2242 }
2243 /* Should use SCRIPT but there is no access to this variable at that
2244 * point. */
2245 proc:::start /ppid == PID && execname == "Python"/ {
2246 trackedpid[pid] = 2;
2247 current_processes += 1;
2248 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
2249 logindex, pid, probeprov, probename,
2250 ppid,
2251 execname,
2252 current_processes);
2253 logindex++;
2254 }
2255 proc:::exit /trackedpid[pid] &&
2256 current_processes == 1 &&
2257 waiting_to_die == 1/ {
2258 trackedpid[pid] = 0;
2259 current_processes -= 1;
2260 printf("%d %d %s_%s(%d)\\n",
2261 logindex, pid, probeprov, probename,
2262 current_processes);
2263 logindex++;
2264 exit(0);
2265 }
2266 proc:::exit /trackedpid[pid]/ {
2267 trackedpid[pid] = 0;
2268 current_processes -= 1;
2269 printf("%d %d %s_%s(%d)\\n",
2270 logindex, pid, probeprov, probename,
2271 current_processes);
2272 logindex++;
2273 }
2274
2275 /* Use an arcane function to detect when we need to die */
2276 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
2277 waiting_to_die = 1;
2278 printf("%d %d %s()\\n", logindex, pid, probefunc);
2279 logindex++;
2280 }
2281 syscall::ftruncate:entry /
2282 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
2283 exit(0);
2284 }
2285
2286 syscall::open*:entry /trackedpid[pid] == 1/ {
2287 self->open_arg0 = arg0;
2288 self->open_arg1 = arg1;
2289 self->open_arg2 = arg2;
2290 }
2291 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
2292 this->open_arg0 = copyinstr(self->open_arg0);
2293 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
2294 logindex, pid, probefunc,
2295 this->open_arg0,
2296 self->open_arg1,
2297 self->open_arg2);
2298 logindex++;
2299 this->open_arg0 = 0;
2300 }
2301 syscall::open*:return /trackedpid[pid] == 1/ {
2302 self->open_arg0 = 0;
2303 self->open_arg1 = 0;
2304 self->open_arg2 = 0;
2305 }
2306
2307 syscall::rename:entry /trackedpid[pid] == 1/ {
2308 self->rename_arg0 = arg0;
2309 self->rename_arg1 = arg1;
2310 }
2311 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
2312 this->rename_arg0 = copyinstr(self->rename_arg0);
2313 this->rename_arg1 = copyinstr(self->rename_arg1);
2314 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
2315 logindex, pid, probefunc,
2316 this->rename_arg0,
2317 this->rename_arg1);
2318 logindex++;
2319 this->rename_arg0 = 0;
2320 this->rename_arg1 = 0;
2321 }
2322 syscall::rename:return /trackedpid[pid] == 1/ {
2323 self->rename_arg0 = 0;
2324 self->rename_arg1 = 0;
2325 }
2326
2327 /* Track chdir, it's painful because it is only receiving relative path.
2328 */
2329 syscall::chdir:entry /trackedpid[pid] == 1/ {
2330 self->chdir_arg0 = arg0;
2331 }
2332 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
2333 this->chdir_arg0 = copyinstr(self->chdir_arg0);
2334 printf("%d %d %s(\\"%s\\")\\n",
2335 logindex, pid, probefunc,
2336 this->chdir_arg0);
2337 logindex++;
2338 this->chdir_arg0 = 0;
2339 }
2340 syscall::chdir:return /trackedpid[pid] == 1/ {
2341 self->chdir_arg0 = 0;
2342 }
2343 """
2344
2345 # execve-specific code, tends to throw a lot of exceptions.
2346 D_CODE_EXECVE = """
2347 /* Finally what we care about! */
2348 syscall::exec*:entry /trackedpid[pid]/ {
2349 self->exec_arg0 = copyinstr(arg0);
2350 /* Incrementally probe for a NULL in the argv parameter of execve() to
2351 * figure out argc. */
2352 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
2353 * found. */
2354 self->exec_argc = 0;
2355 /* Probe for argc==1 */
2356 this->exec_argv = (user_addr_t*)copyin(
2357 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2358 self->exec_argc = this->exec_argv[self->exec_argc] ?
2359 (self->exec_argc + 1) : self->exec_argc;
2360
2361 /* Probe for argc==2 */
2362 this->exec_argv = (user_addr_t*)copyin(
2363 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2364 self->exec_argc = this->exec_argv[self->exec_argc] ?
2365 (self->exec_argc + 1) : self->exec_argc;
2366
2367 /* Probe for argc==3 */
2368 this->exec_argv = (user_addr_t*)copyin(
2369 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2370 self->exec_argc = this->exec_argv[self->exec_argc] ?
2371 (self->exec_argc + 1) : self->exec_argc;
2372
2373 /* Probe for argc==4 */
2374 this->exec_argv = (user_addr_t*)copyin(
2375 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2376 self->exec_argc = this->exec_argv[self->exec_argc] ?
2377 (self->exec_argc + 1) : self->exec_argc;
2378
2379 /* Copy the inputs strings since there is no guarantee they'll be
2380 * present after the call completed. */
2381 self->exec_argv0 = (self->exec_argc > 0) ?
2382 copyinstr(this->exec_argv[0]) : "";
2383 self->exec_argv1 = (self->exec_argc > 1) ?
2384 copyinstr(this->exec_argv[1]) : "";
2385 self->exec_argv2 = (self->exec_argc > 2) ?
2386 copyinstr(this->exec_argv[2]) : "";
2387 self->exec_argv3 = (self->exec_argc > 3) ?
2388 copyinstr(this->exec_argv[3]) : "";
2389 this->exec_argv = 0;
2390 }
2391 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2392 /* We need to join strings here, as using multiple printf() would
2393 * cause tearing when multiple threads/processes are traced.
2394 * Since it is impossible to escape a string and join it to another one,
2395 * like sprintf("%s%S", previous, more), use hackery.
2396 * Each of the elements are split with a \\1. \\0 cannot be used because
2397 * it is simply ignored. This will conflict with any program putting a
2398 * \\1 in their execve() string but this should be "rare enough" */
2399 this->args = "";
2400 /* Process exec_argv[0] */
2401 this->args = strjoin(
2402 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2403
2404 /* Process exec_argv[1] */
2405 this->args = strjoin(
2406 this->args, (self->exec_argc > 1) ? "\\1" : "");
2407 this->args = strjoin(
2408 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2409
2410 /* Process exec_argv[2] */
2411 this->args = strjoin(
2412 this->args, (self->exec_argc > 2) ? "\\1" : "");
2413 this->args = strjoin(
2414 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2415
2416 /* Process exec_argv[3] */
2417 this->args = strjoin(
2418 this->args, (self->exec_argc > 3) ? "\\1" : "");
2419 this->args = strjoin(
2420 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2421
2422 /* Prints self->exec_argc to permits verifying the internal
2423 * consistency since this code is quite fishy. */
2424 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2425 logindex, pid, probefunc,
2426 self->exec_arg0,
2427 self->exec_argc,
2428 this->args);
2429 logindex++;
2430 this->args = 0;
2431 }
2432 syscall::exec*:return /trackedpid[pid]/ {
2433 self->exec_arg0 = 0;
2434 self->exec_argc = 0;
2435 self->exec_argv0 = 0;
2436 self->exec_argv1 = 0;
2437 self->exec_argv2 = 0;
2438 self->exec_argv3 = 0;
2439 }
2440 """
2441
2442 # Code currently not used.
2443 D_EXTRANEOUS = """
2444 /* This is a good learning experience, since it traces a lot of things
2445 * related to the process and child processes.
2446 * Warning: it generates a gigantic log. For example, tracing
2447 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2448 * several minutes to execute.
2449 */
2450 /*
2451 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2452 printf("%d %d %s_%s() = %d\\n",
2453 logindex, pid, probeprov, probefunc, errno);
2454 logindex++;
2455 }
2456 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2457 printf("%d %d %s_%s() = %d\\n",
2458 logindex, pid, probeprov, probefunc, errno);
2459 logindex++;
2460 }
2461 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2462 printf("%d %d %s_%s() = %d\\n",
2463 logindex, pid, probeprov, probefunc, errno);
2464 logindex++;
2465 }
2466 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2467 printf("%d %d %s_%s() = %d\\n",
2468 logindex, pid, probeprov, probefunc, errno);
2469 logindex++;
2470 }
2471 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2472 printf("%d %d %s_%s() = %d\\n",
2473 logindex, pid, probeprov, probefunc, errno);
2474 logindex++;
2475 }
2476 */
2477 /* TODO(maruel): *stat* functions and friends
2478 syscall::access:return,
2479 syscall::chdir:return,
2480 syscall::chflags:return,
2481 syscall::chown:return,
2482 syscall::chroot:return,
2483 syscall::getattrlist:return,
2484 syscall::getxattr:return,
2485 syscall::lchown:return,
2486 syscall::lstat64:return,
2487 syscall::lstat:return,
2488 syscall::mkdir:return,
2489 syscall::pathconf:return,
2490 syscall::readlink:return,
2491 syscall::removexattr:return,
2492 syscall::setxattr:return,
2493 syscall::stat64:return,
2494 syscall::stat:return,
2495 syscall::truncate:return,
2496 syscall::unlink:return,
2497 syscall::utimes:return,
2498 */
2499 """
2500
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002501 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002502 """Starts the log collection with dtrace.
2503
2504 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2505 this needs to wait for dtrace to be "warmed up".
2506 """
2507 super(Dtrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002508 # This script is used as a signal to figure out the root process.
2509 self._signal_script = create_subprocess_thunk()
2510 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002511 # This unique dummy temp file is used to signal the dtrace script that it
2512 # should stop as soon as all the child processes are done. A bit hackish
2513 # but works fine enough.
2514 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2515 prefix='trace_signal_file')
2516
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002517 dtrace_path = '/usr/sbin/dtrace'
2518 if not os.path.isfile(dtrace_path):
2519 dtrace_path = 'dtrace'
2520 elif use_sudo is None and (os.stat(dtrace_path).st_mode & stat.S_ISUID):
2521 # No need to sudo. For those following at home, don't do that.
2522 use_sudo = False
2523
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002524 # Note: do not use the -p flag. It's useless if the initial process quits
2525 # too fast, resulting in missing traces from the grand-children. The D
2526 # code manages the dtrace lifetime itself.
2527 trace_cmd = [
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002528 dtrace_path,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002529 # Use a larger buffer if getting 'out of scratch space' errors.
2530 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2531 '-b', '10m',
2532 '-x', 'dynvarsize=10m',
2533 #'-x', 'dtrace_global_maxsize=1m',
2534 '-x', 'evaltime=exec',
2535 '-o', '/dev/stderr',
2536 '-q',
2537 '-n', self._get_dtrace_code(),
2538 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002539 if use_sudo is not False:
2540 trace_cmd.insert(0, 'sudo')
2541
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002542 with open(self._logname + '.log', 'wb') as logfile:
2543 self._dtrace = subprocess.Popen(
2544 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2545 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2546
2547 # Reads until one line is printed, which signifies dtrace is up and ready.
2548 with open(self._logname + '.log', 'rb') as logfile:
2549 while 'dtrace_BEGIN' not in logfile.readline():
2550 if self._dtrace.poll() is not None:
2551 # Do a busy wait. :/
2552 break
2553 logging.debug('dtrace started')
2554
2555 def _get_dtrace_code(self):
2556 """Setups the D code to implement child process tracking.
2557
2558 Injects the cookie in the script so it knows when to stop.
2559
2560 The script will detect any instance of the script created with
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002561 create_subprocess_thunk() and will start tracing it.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002562 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002563 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002564 'inline int PID = %d;\n'
2565 'inline string SCRIPT = "%s";\n'
2566 'inline int FILE_ID = %d;\n'
2567 '\n'
2568 '%s') % (
2569 os.getpid(),
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002570 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002571 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002572 self.D_CODE)
2573 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2574 # Do not enable by default since it tends to spew dtrace: error lines
2575 # because the execve() parameters are not in valid memory at the time of
2576 # logging.
2577 # TODO(maruel): Find a way to make this reliable since it's useful but
2578 # only works in limited/trivial uses cases for now.
2579 out += self.D_CODE_EXECVE
2580 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002581
2582 def trace(self, cmd, cwd, tracename, output):
2583 """Runs dtrace on an executable.
2584
2585 This dtruss is broken when it starts the process itself or when tracing
2586 child processes, this code starts a wrapper process
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002587 generated with create_subprocess_thunk() which starts the executable to
2588 trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002589 """
2590 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2591 assert os.path.isabs(cmd[0]), cmd[0]
2592 assert os.path.isabs(cwd), cwd
2593 assert os.path.normpath(cwd) == cwd, cwd
2594 with self._lock:
2595 if not self._initialized:
2596 raise TracingFailure(
2597 'Called Tracer.trace() on an unitialized object',
2598 None, None, None, tracename)
2599 assert tracename not in (i['trace'] for i in self._traces)
2600
2601 # Starts the script wrapper to start the child process. This signals the
2602 # dtrace script that this process is to be traced.
2603 stdout = stderr = None
2604 if output:
2605 stdout = subprocess.PIPE
2606 stderr = subprocess.STDOUT
2607 child_cmd = [
2608 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002609 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002610 tracename,
2611 ]
2612 # Call a dummy function so that dtrace knows I'm about to launch a process
2613 # that needs to be traced.
2614 # Yummy.
2615 child = subprocess.Popen(
2616 child_cmd + fix_python_path(cmd),
2617 stdin=subprocess.PIPE,
2618 stdout=stdout,
2619 stderr=stderr,
2620 cwd=cwd)
2621 logging.debug('Started child pid: %d' % child.pid)
2622
2623 out = child.communicate()[0]
2624 # This doesn't mean tracing is done, one of the grand-child process may
2625 # still be alive. It will be tracked with the dtrace script.
2626
2627 with self._lock:
2628 assert tracename not in (i['trace'] for i in self._traces)
2629 self._traces.append(
2630 {
2631 'cmd': cmd,
2632 'cwd': cwd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002633 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002634 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002635 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002636 })
2637 return child.returncode, out
2638
2639 def close(self, timeout=None):
2640 """Terminates dtrace."""
2641 logging.debug('close(%s)' % timeout)
2642 try:
2643 try:
2644 super(Dtrace.Tracer, self).close(timeout)
2645 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002646 # ftruncate doesn't exist on Windows.
2647 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002648 if timeout:
2649 start = time.time()
2650 # Use polling. :/
2651 while (self._dtrace.poll() is None and
2652 (time.time() - start) < timeout):
2653 time.sleep(0.1)
2654 self._dtrace.kill()
2655 self._dtrace.wait()
2656 finally:
2657 # Make sure to kill it in any case.
2658 if self._dtrace.poll() is None:
2659 try:
2660 self._dtrace.kill()
2661 self._dtrace.wait()
2662 except OSError:
2663 pass
2664
2665 if self._dtrace.returncode != 0:
2666 # Warn about any dtrace failure but basically ignore it.
2667 print 'dtrace failure: %s' % self._dtrace.returncode
2668 finally:
2669 os.close(self._dummy_file_id)
2670 os.remove(self._dummy_file_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002671
2672 def post_process_log(self):
2673 """Sorts the log back in order when each call occured.
2674
2675 dtrace doesn't save the buffer in strict order since it keeps one buffer
2676 per CPU.
2677 """
2678 super(Dtrace.Tracer, self).post_process_log()
2679 logname = self._logname + '.log'
2680 with open(logname, 'rb') as logfile:
2681 lines = [l for l in logfile if l.strip()]
2682 errors = [l for l in lines if l.startswith('dtrace:')]
2683 if errors:
2684 raise TracingFailure(
2685 'Found errors in the trace: %s' % '\n'.join(errors),
2686 None, None, None, logname)
2687 try:
2688 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2689 except ValueError:
2690 raise TracingFailure(
2691 'Found errors in the trace: %s' % '\n'.join(
2692 l for l in lines if l.split(' ', 1)[0].isdigit()),
2693 None, None, None, logname)
2694 with open(logname, 'wb') as logfile:
2695 logfile.write(''.join(lines))
2696
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002697 def __init__(self, use_sudo=None):
2698 super(Dtrace, self).__init__()
2699 self.use_sudo = use_sudo
2700
2701 def get_tracer(self, logname):
2702 return self.Tracer(logname, self.use_sudo)
2703
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002704 @staticmethod
2705 def clean_trace(logname):
2706 for ext in ('', '.log'):
2707 if os.path.isfile(logname + ext):
2708 os.remove(logname + ext)
2709
2710 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002711 def parse_log(cls, logname, blacklist, trace_name):
2712 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002713 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002714
2715 def blacklist_more(filepath):
2716 # All the HFS metadata is in the form /.vol/...
2717 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2718
2719 data = read_json(logname)
2720 out = []
2721 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002722 if trace_name and item['trace'] != trace_name:
2723 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002724 result = {
2725 'output': item['output'],
2726 'trace': item['trace'],
2727 }
2728 try:
2729 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002730 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2731 # be valid UTF-8 and we control the log output.
2732 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002733 context.on_line(line)
2734 result['results'] = context.to_results()
2735 except TracingFailure:
2736 result['exception'] = sys.exc_info()
2737 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002738 return out
2739
2740
2741class LogmanTrace(ApiBase):
2742 """Uses the native Windows ETW based tracing functionality to trace a child
2743 process.
2744
2745 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2746 the Windows Kernel doesn't have a concept of 'current working directory' at
2747 all. A Win32 process has a map of current directories, one per drive letter
2748 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2749 opened relative to another file_object or as an absolute path. All the current
2750 working directory logic is done in user mode.
2751 """
2752 class Context(ApiBase.Context):
2753 """Processes a ETW log line and keeps the list of existent and non
2754 existent files accessed.
2755
2756 Ignores directories.
2757 """
2758 # These indexes are for the stripped version in json.
2759 EVENT_NAME = 0
2760 TYPE = 1
2761 PID = 2
2762 TID = 3
2763 PROCESSOR_ID = 4
2764 TIMESTAMP = 5
2765 USER_DATA = 6
2766
2767 class Process(ApiBase.Context.Process):
2768 def __init__(self, *args):
2769 super(LogmanTrace.Context.Process, self).__init__(*args)
2770 # Handle file objects that succeeded.
2771 self.file_objects = {}
2772
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002773 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2774 logging.info(
2775 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2776 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002777 super(LogmanTrace.Context, self).__init__(blacklist)
2778 self._drive_map = DosDriveMap()
2779 # Threads mapping to the corresponding process id.
2780 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002781 # Process ID of the tracer, e.g. the temporary script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002782 # create_subprocess_thunk(). This is tricky because the process id may
2783 # have been reused.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002784 self._thunk_pid = thunk_pid
2785 self._thunk_cmd = thunk_cmd
2786 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002787 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002788 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002789
2790 def on_line(self, line):
2791 """Processes a json Event line."""
2792 self._line_number += 1
2793 try:
2794 # By Opcode
2795 handler = getattr(
2796 self,
2797 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2798 None)
2799 if not handler:
2800 raise TracingFailure(
2801 'Unexpected event %s_%s' % (
2802 line[self.EVENT_NAME], line[self.TYPE]),
2803 None, None, None)
2804 handler(line)
2805 except TracingFailure, e:
2806 # Hack in the values since the handler could be a static function.
2807 e.pid = line[self.PID]
2808 e.line = line
2809 e.line_number = self._line_number
2810 # Re-raise the modified exception.
2811 raise
2812 except (KeyError, NotImplementedError, ValueError), e:
2813 raise TracingFailure(
2814 'Trace generated a %s exception: %s' % (
2815 e.__class__.__name__, str(e)),
2816 line[self.PID],
2817 self._line_number,
2818 line,
2819 e)
2820
2821 def to_results(self):
2822 if not self.root_process:
2823 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002824 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002825 None, None, None)
2826 process = self.root_process.to_results_process()
2827 return Results(process)
2828
2829 def _thread_to_process(self, tid):
2830 """Finds the process from the thread id."""
2831 tid = int(tid, 16)
2832 pid = self._threads_active.get(tid)
2833 if not pid or not self._process_lookup.get(pid):
2834 return
2835 return self._process_lookup[pid]
2836
2837 @classmethod
2838 def handle_EventTrace_Header(cls, line):
2839 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2840 BUFFER_SIZE = cls.USER_DATA
2841 #VERSION = cls.USER_DATA + 1
2842 #PROVIDER_VERSION = cls.USER_DATA + 2
2843 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2844 #END_TIME = cls.USER_DATA + 4
2845 #TIMER_RESOLUTION = cls.USER_DATA + 5
2846 #MAX_FILE_SIZE = cls.USER_DATA + 6
2847 #LOG_FILE_MODE = cls.USER_DATA + 7
2848 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2849 #START_BUFFERS = cls.USER_DATA + 9
2850 #POINTER_SIZE = cls.USER_DATA + 10
2851 EVENTS_LOST = cls.USER_DATA + 11
2852 #CPU_SPEED = cls.USER_DATA + 12
2853 #LOGGER_NAME = cls.USER_DATA + 13
2854 #LOG_FILE_NAME = cls.USER_DATA + 14
2855 #BOOT_TIME = cls.USER_DATA + 15
2856 #PERF_FREQ = cls.USER_DATA + 16
2857 #START_TIME = cls.USER_DATA + 17
2858 #RESERVED_FLAGS = cls.USER_DATA + 18
2859 #BUFFERS_LOST = cls.USER_DATA + 19
2860 #SESSION_NAME_STRING = cls.USER_DATA + 20
2861 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2862 if line[EVENTS_LOST] != '0':
2863 raise TracingFailure(
2864 ( '%s events were lost during trace, please increase the buffer '
2865 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2866 None, None, None)
2867
2868 def handle_FileIo_Cleanup(self, line):
2869 """General wisdom: if a file is closed, it's because it was opened.
2870
2871 Note that FileIo_Close is not used since if a file was opened properly but
2872 not closed before the process exits, only Cleanup will be logged.
2873 """
2874 #IRP = self.USER_DATA
2875 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2876 FILE_OBJECT = self.USER_DATA + 2
2877 #FILE_KEY = self.USER_DATA + 3
2878 proc = self._thread_to_process(line[TTID])
2879 if not proc:
2880 # Not a process we care about.
2881 return
2882 file_object = line[FILE_OBJECT]
2883 if file_object in proc.file_objects:
maruel@chromium.org538141b2013-06-03 20:57:17 +00002884 filepath, access_type = proc.file_objects.pop(file_object)
2885 proc.add_file(filepath, access_type)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002886
2887 def handle_FileIo_Create(self, line):
2888 """Handles a file open.
2889
2890 All FileIo events are described at
2891 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2892 for some value of 'description'.
2893
2894 " (..) process and thread id values of the IO events (..) are not valid "
2895 http://msdn.microsoft.com/magazine/ee358703.aspx
2896
2897 The FileIo.Create event doesn't return if the CreateFile() call
2898 succeeded, so keep track of the file_object and check that it is
2899 eventually closed with FileIo_Cleanup.
2900 """
2901 #IRP = self.USER_DATA
2902 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2903 FILE_OBJECT = self.USER_DATA + 2
2904 #CREATE_OPTIONS = self.USER_DATA + 3
2905 #FILE_ATTRIBUTES = self.USER_DATA + 4
maruel@chromium.org538141b2013-06-03 20:57:17 +00002906 #SHARE_ACCESS = self.USER_DATA + 5
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002907 OPEN_PATH = self.USER_DATA + 6
2908
2909 proc = self._thread_to_process(line[TTID])
2910 if not proc:
2911 # Not a process we care about.
2912 return
2913
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002914 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002915 # Ignore directories and bare drive right away.
2916 if raw_path.endswith(os.path.sep):
2917 return
2918 filepath = self._drive_map.to_win32(raw_path)
2919 # Ignore bare drive right away. Some may still fall through with format
2920 # like '\\?\X:'
2921 if len(filepath) == 2:
2922 return
2923 file_object = line[FILE_OBJECT]
2924 if os.path.isdir(filepath):
2925 # There is no O_DIRECTORY equivalent on Windows. The closed is
2926 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2927 # simply discard directories are they are found.
2928 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002929 # Override any stale file object.
2930 # TODO(maruel): Figure out a way to detect if the file was opened for
2931 # reading or writting. Sadly CREATE_OPTIONS doesn't seem to be of any help
2932 # here. For now mark as None to make it clear we have no idea what it is
2933 # about.
2934 proc.file_objects[file_object] = (filepath, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002935
2936 def handle_FileIo_Rename(self, line):
2937 # TODO(maruel): Handle?
2938 pass
2939
2940 def handle_Process_End(self, line):
2941 pid = line[self.PID]
2942 if self._process_lookup.get(pid):
2943 logging.info('Terminated: %d' % pid)
2944 self._process_lookup[pid] = None
2945 else:
2946 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002947 if self._thunk_process and self._thunk_process.pid == pid:
2948 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002949
2950 def handle_Process_Start(self, line):
2951 """Handles a new child process started by PID."""
2952 #UNIQUE_PROCESS_KEY = self.USER_DATA
2953 PROCESS_ID = self.USER_DATA + 1
2954 #PARENT_PID = self.USER_DATA + 2
2955 #SESSION_ID = self.USER_DATA + 3
2956 #EXIT_STATUS = self.USER_DATA + 4
2957 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2958 #USER_SID = self.USER_DATA + 6
2959 IMAGE_FILE_NAME = self.USER_DATA + 7
2960 COMMAND_LINE = self.USER_DATA + 8
2961
2962 ppid = line[self.PID]
2963 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002964 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002965 logging.debug(
2966 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002967 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002968
2969 if pid == self._thunk_pid:
2970 # Need to ignore processes we don't know about because the log is
2971 # system-wide. self._thunk_pid shall start only one process.
2972 # This is tricky though because Windows *loves* to reuse process id and
2973 # it happens often that the process ID of the thunk script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002974 # create_subprocess_thunk() is reused. So just detecting the pid here is
2975 # not sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002976 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2977 logging.info(
2978 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2979 pid, self._trace_name, command_line, self._thunk_cmd)
2980 return
2981
2982 # TODO(maruel): The check is quite weak. Add the thunk path.
2983 if self._thunk_process:
2984 raise TracingFailure(
2985 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2986 'already set') % (self._thunk_pid, self._thunk_process.pid),
2987 None, None, None)
2988 proc = self.Process(self.blacklist, pid, None)
2989 self._thunk_process = proc
2990 return
2991 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002992 proc = self.Process(self.blacklist, pid, None)
2993 self.root_process = proc
2994 ppid = None
2995 elif self._process_lookup.get(ppid):
2996 proc = self.Process(self.blacklist, pid, None)
2997 self._process_lookup[ppid].children.append(proc)
2998 else:
2999 # Ignore
3000 return
3001 self._process_lookup[pid] = proc
3002
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003003 proc.command = command_line
3004 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003005 # proc.command[0] may be the absolute path of 'executable' but it may be
3006 # anything else too. If it happens that command[0] ends with executable,
3007 # use it, otherwise defaults to the base name.
3008 cmd0 = proc.command[0].lower()
3009 if not cmd0.endswith('.exe'):
3010 # TODO(maruel): That's not strictly true either.
3011 cmd0 += '.exe'
3012 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
3013 # Fix the path.
3014 cmd0 = cmd0.replace('/', os.path.sep)
3015 cmd0 = os.path.normpath(cmd0)
3016 proc.executable = get_native_path_case(cmd0)
3017 logging.info(
3018 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
3019
3020 def handle_Thread_End(self, line):
3021 """Has the same parameters as Thread_Start."""
3022 tid = int(line[self.TID], 16)
3023 self._threads_active.pop(tid, None)
3024
3025 def handle_Thread_Start(self, line):
3026 """Handles a new thread created.
3027
3028 Do not use self.PID here since a process' initial thread is created by
3029 the parent process.
3030 """
3031 PROCESS_ID = self.USER_DATA
3032 TTHREAD_ID = self.USER_DATA + 1
3033 #STACK_BASE = self.USER_DATA + 2
3034 #STACK_LIMIT = self.USER_DATA + 3
3035 #USER_STACK_BASE = self.USER_DATA + 4
3036 #USER_STACK_LIMIT = self.USER_DATA + 5
3037 #AFFINITY = self.USER_DATA + 6
3038 #WIN32_START_ADDR = self.USER_DATA + 7
3039 #TEB_BASE = self.USER_DATA + 8
3040 #SUB_PROCESS_TAG = self.USER_DATA + 9
3041 #BASE_PRIORITY = self.USER_DATA + 10
3042 #PAGE_PRIORITY = self.USER_DATA + 11
3043 #IO_PRIORITY = self.USER_DATA + 12
3044 #THREAD_FLAGS = self.USER_DATA + 13
3045 # Do not use self.PID here since a process' initial thread is created by
3046 # the parent process.
3047 pid = int(line[PROCESS_ID], 16)
3048 tid = int(line[TTHREAD_ID], 16)
3049 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
3050 self._threads_active[tid] = pid
3051
3052 @classmethod
3053 def supported_events(cls):
3054 """Returns all the procesed events."""
3055 out = []
3056 for member in dir(cls):
3057 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
3058 if match:
3059 out.append(match.groups())
3060 return out
3061
3062 class Tracer(ApiBase.Tracer):
3063 # The basic headers.
3064 EXPECTED_HEADER = [
3065 u'Event Name',
3066 u'Type',
3067 u'Event ID',
3068 u'Version',
3069 u'Channel',
3070 u'Level', # 5
3071 u'Opcode',
3072 u'Task',
3073 u'Keyword',
3074 u'PID',
3075 u'TID', # 10
3076 u'Processor Number',
3077 u'Instance ID',
3078 u'Parent Instance ID',
3079 u'Activity ID',
3080 u'Related Activity ID', # 15
3081 u'Clock-Time',
3082 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
3083 u'User(ms)', # pretty much useless.
3084 u'User Data', # Extra arguments that are event-specific.
3085 ]
3086 # Only the useful headers common to all entries are listed there. Any column
3087 # at 19 or higher is dependent on the specific event.
3088 EVENT_NAME = 0
3089 TYPE = 1
3090 PID = 9
3091 TID = 10
3092 PROCESSOR_ID = 11
3093 TIMESTAMP = 16
3094 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
3095 USER_DATA = 19
3096
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003097 class CsvReader(object):
3098 """CSV reader that reads files generated by tracerpt.exe.
3099
3100 csv.reader() fails to read them properly, it mangles file names quoted
3101 with "" with a comma in it.
3102 """
3103 # 0. Had a ',' or one of the following ' ' after a comma, next should
3104 # be ' ', '"' or string or ',' for an empty field.
3105 ( HAD_DELIMITER,
3106 # 1. Processing an unquoted field up to ','.
3107 IN_STR,
3108 # 2. Processing a new field starting with '"'.
3109 STARTING_STR_QUOTED,
3110 # 3. Second quote in a row at the start of a field. It could be either
3111 # '""foo""' or '""'. Who the hell thought it was a great idea to use
3112 # the same character for delimiting and escaping?
3113 STARTING_SECOND_QUOTE,
3114 # 4. A quote inside a quoted string where the previous character was
3115 # not a quote, so the string is not empty. Can be either: end of a
3116 # quoted string (a delimiter) or a quote escape. The next char must be
3117 # either '"' or ','.
3118 HAD_QUOTE_IN_QUOTED,
3119 # 5. Second quote inside a quoted string.
3120 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
3121 # 6. Processing a field that started with '"'.
3122 IN_STR_QUOTED) = range(7)
3123
3124 def __init__(self, f):
3125 self.f = f
3126
3127 def __iter__(self):
3128 return self
3129
3130 def next(self):
3131 """Splits the line in fields."""
3132 line = self.f.readline()
3133 if not line:
3134 raise StopIteration()
3135 line = line.strip()
3136 fields = []
3137 state = self.HAD_DELIMITER
3138 for i, c in enumerate(line):
3139 if state == self.HAD_DELIMITER:
3140 if c == ',':
3141 # Empty field.
3142 fields.append('')
3143 elif c == ' ':
3144 # Ignore initial whitespaces
3145 pass
3146 elif c == '"':
3147 state = self.STARTING_STR_QUOTED
3148 fields.append('')
3149 else:
3150 # Start of a new field.
3151 state = self.IN_STR
3152 fields.append(c)
3153
3154 elif state == self.IN_STR:
3155 # Do not accept quote inside unquoted field.
3156 assert c != '"', (i, c, line, fields)
3157 if c == ',':
3158 fields[-1] = fields[-1].strip()
3159 state = self.HAD_DELIMITER
3160 else:
3161 fields[-1] = fields[-1] + c
3162
3163 elif state == self.STARTING_STR_QUOTED:
3164 if c == '"':
3165 # Do not store the character yet.
3166 state = self.STARTING_SECOND_QUOTE
3167 else:
3168 state = self.IN_STR_QUOTED
3169 fields[-1] = fields[-1] + c
3170
3171 elif state == self.STARTING_SECOND_QUOTE:
3172 if c == ',':
3173 # It was an empty field. '""' == ''.
3174 state = self.HAD_DELIMITER
3175 else:
3176 fields[-1] = fields[-1] + '"' + c
3177 state = self.IN_STR_QUOTED
3178
3179 elif state == self.HAD_QUOTE_IN_QUOTED:
3180 if c == ',':
3181 # End of the string.
3182 state = self.HAD_DELIMITER
3183 elif c == '"':
3184 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
3185 else:
3186 # The previous double-quote was just an unescaped quote.
3187 fields[-1] = fields[-1] + '"' + c
3188 state = self.IN_STR_QUOTED
3189
3190 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
3191 if c == ',':
3192 # End of the string.
3193 state = self.HAD_DELIMITER
3194 fields[-1] = fields[-1] + '"'
3195 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00003196 # That's just how the logger rolls. Revert back to appending the
3197 # char and "guess" it was a quote in a double-quoted string.
3198 state = self.IN_STR_QUOTED
3199 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003200
3201 elif state == self.IN_STR_QUOTED:
3202 if c == '"':
3203 # Could be a delimiter or an escape.
3204 state = self.HAD_QUOTE_IN_QUOTED
3205 else:
3206 fields[-1] = fields[-1] + c
3207
3208 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
3209 fields[-1] = fields[-1] + '"'
3210 else:
3211 assert state in (
3212 # Terminated with a normal field.
3213 self.IN_STR,
3214 # Terminated with an empty field.
3215 self.STARTING_SECOND_QUOTE,
3216 # Terminated with a normal quoted field.
3217 self.HAD_QUOTE_IN_QUOTED), (
3218 line, state, fields)
3219 return fields
3220
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003221 def __init__(self, logname):
3222 """Starts the log collection.
3223
3224 Requires administrative access. logman.exe is synchronous so no need for a
3225 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
3226 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
3227
3228 One can get the list of potentially interesting providers with:
3229 "logman query providers | findstr /i file"
3230 """
3231 super(LogmanTrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00003232 self._signal_script = create_subprocess_thunk()
3233 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003234 cmd_start = [
3235 'logman.exe',
3236 'start',
3237 'NT Kernel Logger',
3238 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
3239 # splitio,fileiocompletion,syscall,file,cswitch,img
3240 '(process,fileio,thread)',
3241 '-o', self._logname + '.etl',
3242 '-ets', # Send directly to kernel
3243 # Values extracted out of thin air.
3244 # Event Trace Session buffer size in kb.
3245 '-bs', '10240',
3246 # Number of Event Trace Session buffers.
3247 '-nb', '16', '256',
3248 ]
3249 logging.debug('Running: %s' % cmd_start)
3250 try:
3251 subprocess.check_call(
3252 cmd_start,
3253 stdin=subprocess.PIPE,
3254 stdout=subprocess.PIPE,
3255 stderr=subprocess.STDOUT)
3256 except subprocess.CalledProcessError, e:
3257 if e.returncode == -2147024891:
3258 print >> sys.stderr, 'Please restart with an elevated admin prompt'
3259 elif e.returncode == -2144337737:
3260 print >> sys.stderr, (
3261 'A kernel trace was already running, stop it and try again')
3262 raise
3263
3264 def trace(self, cmd, cwd, tracename, output):
3265 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
3266 assert os.path.isabs(cmd[0]), cmd[0]
3267 assert os.path.isabs(cwd), cwd
3268 assert os.path.normpath(cwd) == cwd, cwd
3269 with self._lock:
3270 if not self._initialized:
3271 raise TracingFailure(
3272 'Called Tracer.trace() on an unitialized object',
3273 None, None, None, tracename)
3274 assert tracename not in (i['trace'] for i in self._traces)
3275
3276 # Use "logman -?" for help.
3277
3278 stdout = stderr = None
3279 if output:
3280 stdout = subprocess.PIPE
3281 stderr = subprocess.STDOUT
3282
3283 # Run the child process.
3284 logging.debug('Running: %s' % cmd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00003285 # Use the temporary script generated with create_subprocess_thunk() so we
3286 # have a clear pid owner. Since trace_inputs.py can be used as a library
3287 # and could trace multiple processes simultaneously, it makes it more
3288 # complex if the executable to be traced is executed directly here. It
3289 # also solves issues related to logman.exe that needs to be executed to
3290 # control the kernel trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003291 child_cmd = [
3292 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00003293 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003294 tracename,
3295 ]
3296 child = subprocess.Popen(
3297 child_cmd + fix_python_path(cmd),
3298 cwd=cwd,
3299 stdin=subprocess.PIPE,
3300 stdout=stdout,
3301 stderr=stderr)
3302 logging.debug('Started child pid: %d' % child.pid)
3303 out = child.communicate()[0]
3304 # This doesn't mean all the grand-children are done. Sadly, we don't have
3305 # a good way to determine that.
3306
3307 with self._lock:
3308 assert tracename not in (i['trace'] for i in self._traces)
3309 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00003310 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003311 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00003312 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003313 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00003314 # Used to figure out the real process when process ids are reused.
3315 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003316 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003317 })
3318
3319 return child.returncode, out
3320
3321 def close(self, _timeout=None):
3322 """Stops the kernel log collection and converts the traces to text
3323 representation.
3324 """
3325 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00003326 try:
3327 super(LogmanTrace.Tracer, self).close()
3328 finally:
3329 cmd_stop = [
3330 'logman.exe',
3331 'stop',
3332 'NT Kernel Logger',
3333 '-ets', # Sends the command directly to the kernel.
3334 ]
3335 logging.debug('Running: %s' % cmd_stop)
3336 subprocess.check_call(
3337 cmd_stop,
3338 stdin=subprocess.PIPE,
3339 stdout=subprocess.PIPE,
3340 stderr=subprocess.STDOUT)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003341
3342 def post_process_log(self):
3343 """Converts the .etl file into .csv then into .json."""
3344 super(LogmanTrace.Tracer, self).post_process_log()
3345 logformat = 'csv'
3346 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003347 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003348
maruel@chromium.org808e3f22013-05-16 16:42:54 +00003349 def _gen_logdata(self):
3350 return {
3351 'format': 'csv',
3352 'traces': self._traces,
3353 }
3354
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003355 def _trim_log(self, logformat):
3356 """Reduces the amount of data in original log by generating a 'reduced'
3357 log.
3358 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003359 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003360 file_handle = codecs.open(
3361 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003362
3363 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003364 assert sys.getfilesystemencoding() == 'mbcs'
3365 file_handle = codecs.open(
3366 self._logname + '.' + logformat, 'r',
3367 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003368
3369 supported_events = LogmanTrace.Context.supported_events()
3370
3371 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003372 """Loads items from the generator and returns the interesting data.
3373
3374 It filters out any uninteresting line and reduce the amount of data in
3375 the trace.
3376 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003377 for index, line in enumerate(generator):
3378 if not index:
3379 if line != self.EXPECTED_HEADER:
3380 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003381 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003382 None, None, None)
3383 continue
3384 # As you can see, the CSV is full of useful non-redundant information:
3385 if (line[2] != '0' or # Event ID
3386 line[3] not in ('2', '3') or # Version
3387 line[4] != '0' or # Channel
3388 line[5] != '0' or # Level
3389 line[7] != '0' or # Task
3390 line[8] != '0x0000000000000000' or # Keyword
3391 line[12] != '' or # Instance ID
3392 line[13] != '' or # Parent Instance ID
3393 line[14] != self.NULL_GUID or # Activity ID
3394 line[15] != ''): # Related Activity ID
3395 raise TracingFailure(
3396 'Found unexpected values in line: %s' % ' '.join(line),
3397 None, None, None)
3398
3399 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3400 continue
3401
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003402 yield [
3403 line[self.EVENT_NAME],
3404 line[self.TYPE],
3405 line[self.PID],
3406 line[self.TID],
3407 line[self.PROCESSOR_ID],
3408 line[self.TIMESTAMP],
3409 ] + line[self.USER_DATA:]
3410
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003411 # must not convert the trim() call into a list, since it will use too much
3412 # memory for large trace. use a csv file as a workaround since the json
3413 # parser requires a complete in-memory file.
3414 with open('%s.preprocessed' % self._logname, 'wb') as f:
3415 # $ and * can't be used in file name on windows, reducing the likelihood
3416 # of having to escape a string.
3417 out = csv.writer(
3418 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003419 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003420 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003421
3422 def _convert_log(self, logformat):
3423 """Converts the ETL trace to text representation.
3424
3425 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3426 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3427
3428 Arguments:
3429 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3430
3431 Use "tracerpt -?" for help.
3432 """
3433 LOCALE_INVARIANT = 0x7F
3434 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3435 cmd_convert = [
3436 'tracerpt.exe',
3437 '-l', self._logname + '.etl',
3438 '-o', self._logname + '.' + logformat,
3439 '-gmt', # Use UTC
3440 '-y', # No prompt
3441 # Use -of XML to get the header of each items after column 19, e.g. all
3442 # the actual headers of 'User Data'.
3443 ]
3444
3445 if logformat == 'csv':
3446 # tracerpt localizes the 'Type' column, for major brainfuck
3447 # entertainment. I can't imagine any sane reason to do that.
3448 cmd_convert.extend(['-of', 'CSV'])
3449 elif logformat == 'csv_utf16':
3450 # This causes it to use UTF-16, which doubles the log size but ensures
3451 # the log is readable for non-ASCII characters.
3452 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3453 elif logformat == 'xml':
3454 cmd_convert.extend(['-of', 'XML'])
3455 else:
3456 raise ValueError('Unexpected log format \'%s\'' % logformat)
3457 logging.debug('Running: %s' % cmd_convert)
3458 # This can takes tens of minutes for large logs.
3459 # Redirects all output to stderr.
3460 subprocess.check_call(
3461 cmd_convert,
3462 stdin=subprocess.PIPE,
3463 stdout=sys.stderr,
3464 stderr=sys.stderr)
3465
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003466 def __init__(self, use_sudo=False): # pylint: disable=W0613
3467 super(LogmanTrace, self).__init__()
3468 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3469
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003470 @staticmethod
3471 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003472 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003473 if os.path.isfile(logname + ext):
3474 os.remove(logname + ext)
3475
3476 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003477 def parse_log(cls, logname, blacklist, trace_name):
3478 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003479 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003480
3481 def blacklist_more(filepath):
3482 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3483 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3484
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003485 # Create a list of (Context, result_dict) tuples. This is necessary because
3486 # the csv file may be larger than the amount of available memory.
3487 contexes = [
3488 (
3489 cls.Context(
3490 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3491 {
3492 'output': item['output'],
3493 'trace': item['trace'],
3494 },
3495 )
3496 for item in read_json(logname)['traces']
3497 if not trace_name or item['trace'] == trace_name
3498 ]
3499
3500 # The log may be too large to fit in memory and it is not efficient to read
3501 # it multiple times, so multiplex the contexes instead, which is slightly
3502 # more awkward.
3503 with open('%s.preprocessed' % logname, 'rb') as f:
3504 lines = csv.reader(
3505 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3506 for encoded in lines:
3507 line = [s.decode('utf-8') for s in encoded]
3508 # Convert the PID in-place from hex.
3509 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3510 for context in contexes:
3511 if 'exception' in context[1]:
3512 continue
3513 try:
3514 context[0].on_line(line)
3515 except TracingFailure:
3516 context[1]['exception'] = sys.exc_info()
3517
3518 for context in contexes:
3519 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003520 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003521 context[1]['results'] = context[0].to_results()
3522
3523 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003524
3525
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003526def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003527 """Returns the correct implementation for the current OS."""
3528 if sys.platform == 'cygwin':
3529 raise NotImplementedError(
3530 'Not implemented for cygwin, start the script from Win32 python')
3531 flavors = {
3532 'win32': LogmanTrace,
3533 'darwin': Dtrace,
3534 'sunos5': Dtrace,
3535 'freebsd7': Dtrace,
3536 'freebsd8': Dtrace,
3537 }
3538 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003539 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003540
3541
3542def extract_directories(root_dir, files, blacklist):
3543 """Detects if all the files in a directory are in |files| and if so, replace
3544 the individual files by a Results.Directory instance.
3545
3546 Takes a list of Results.File instances and returns a shorter list of
3547 Results.File and Results.Directory instances.
3548
3549 Arguments:
3550 - root_dir: Optional base directory that shouldn't be search further.
3551 - files: list of Results.File instances.
3552 - blacklist: regexp of files to ignore, for example r'.+\.pyc'.
3553 """
3554 logging.info(
3555 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3556 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003557 # It is important for root_dir to not be a symlinked path, make sure to call
3558 # os.path.realpath() as needed.
3559 assert not root_dir or (
3560 os.path.realpath(get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003561 assert not any(isinstance(f, Results.Directory) for f in files)
3562 # Remove non existent files.
3563 files = [f for f in files if f.existent]
3564 if not files:
3565 return files
3566 # All files must share the same root, which can be None.
3567 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3568
3569 # Creates a {directory: {filename: File}} mapping, up to root.
3570 buckets = {}
3571 if root_dir:
3572 buckets[root_dir] = {}
3573 for fileobj in files:
3574 path = fileobj.full_path
3575 directory = os.path.dirname(path)
3576 assert directory
3577 # Do not use os.path.basename() so trailing os.path.sep is kept.
3578 basename = path[len(directory)+1:]
3579 files_in_directory = buckets.setdefault(directory, {})
3580 files_in_directory[basename] = fileobj
3581 # Add all the directories recursively up to root.
3582 while True:
3583 old_d = directory
3584 directory = os.path.dirname(directory)
3585 if directory + os.path.sep == root_dir or directory == old_d:
3586 break
3587 buckets.setdefault(directory, {})
3588
3589 root_prefix = len(root_dir) + 1 if root_dir else 0
3590 for directory in sorted(buckets, reverse=True):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003591 if not os.path.isdir(directory):
3592 logging.debug(
3593 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3594 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003595 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3596 expected = set(buckets[directory])
3597 if not (actual - expected):
3598 parent = os.path.dirname(directory)
3599 buckets[parent][os.path.basename(directory)] = Results.Directory(
3600 root_dir,
3601 directory[root_prefix:],
3602 False,
3603 sum(f.size for f in buckets[directory].itervalues()),
3604 sum(f.nb_files for f in buckets[directory].itervalues()))
3605 # Remove the whole bucket.
3606 del buckets[directory]
3607
3608 # Reverse the mapping with what remains. The original instances are returned,
3609 # so the cached meta data is kept.
3610 files = sum((x.values() for x in buckets.itervalues()), [])
3611 return sorted(files, key=lambda x: x.path)
3612
3613
3614def trace(logfile, cmd, cwd, api, output):
3615 """Traces an executable. Returns (returncode, output) from api.
3616
3617 Arguments:
3618 - logfile: file to write to.
3619 - cmd: command to run.
3620 - cwd: current directory to start the process in.
3621 - api: a tracing api instance.
3622 - output: if True, returns output, otherwise prints it at the console.
3623 """
3624 cmd = fix_python_path(cmd)
3625 api.clean_trace(logfile)
3626 with api.get_tracer(logfile) as tracer:
3627 return tracer.trace(cmd, cwd, 'default', output)
3628
3629
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003630def CMDclean(args):
3631 """Cleans up traces."""
3632 parser = OptionParserTraceInputs(command='clean')
3633 options, args = parser.parse_args(args)
3634 api = get_api()
3635 api.clean_trace(options.log)
3636 return 0
3637
3638
3639def CMDtrace(args):
3640 """Traces an executable."""
3641 parser = OptionParserTraceInputs(command='trace')
3642 parser.allow_interspersed_args = False
3643 parser.add_option(
3644 '-q', '--quiet', action='store_true',
3645 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003646 parser.add_option(
3647 '-s', '--sudo', action='store_true',
3648 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3649 parser.add_option(
3650 '-n', '--no-sudo', action='store_false',
3651 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003652 options, args = parser.parse_args(args)
3653
3654 if not args:
3655 parser.error('Please provide a command to run')
3656
3657 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3658 args[0] = os.path.abspath(args[0])
3659
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003660 # options.sudo default value is None, which is to do whatever tracer defaults
3661 # do.
3662 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003663 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3664
3665
3666def CMDread(args):
3667 """Reads the logs and prints the result."""
3668 parser = OptionParserTraceInputs(command='read')
3669 parser.add_option(
3670 '-V', '--variable',
3671 nargs=2,
3672 action='append',
3673 dest='variables',
3674 metavar='VAR_NAME directory',
3675 default=[],
3676 help=('Variables to replace relative directories against. Example: '
3677 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3678 'home dir with $HOME') % getpass.getuser())
3679 parser.add_option(
3680 '--root-dir',
3681 help='Root directory to base everything off it. Anything outside of this '
3682 'this directory will not be reported')
3683 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003684 '--trace-name',
3685 help='Only reads one of the trace. Defaults to reading all traces')
3686 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003687 '-j', '--json', action='store_true',
3688 help='Outputs raw result data as json')
3689 parser.add_option(
3690 '-b', '--blacklist', action='append', default=[],
3691 help='List of regexp to use as blacklist filter')
3692 options, args = parser.parse_args(args)
3693
3694 if options.root_dir:
maruel@chromium.org7dc17b32013-04-02 22:27:05 +00003695 options.root_dir = get_native_path_case(
3696 unicode(os.path.abspath(options.root_dir)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003697
3698 variables = dict(options.variables)
3699 api = get_api()
3700 def blacklist(f):
3701 return any(re.match(b, f) for b in options.blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003702 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003703 # Process each trace.
3704 output_as_json = []
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003705 try:
3706 for item in data:
3707 if 'exception' in item:
3708 # Do not abort the other traces.
3709 print >> sys.stderr, (
3710 'Trace %s: Got an exception: %s' % (
3711 item['trace'], item['exception'][1]))
3712 continue
3713 results = item['results']
3714 if options.root_dir:
3715 results = results.strip_root(options.root_dir)
3716
3717 if options.json:
3718 output_as_json.append(results.flatten())
3719 else:
3720 simplified = extract_directories(
3721 options.root_dir, results.files, blacklist)
3722 simplified = [f.replace_variables(variables) for f in simplified]
3723 if len(data) > 1:
3724 print('Trace: %s' % item['trace'])
3725 print('Total: %d' % len(results.files))
3726 print('Non existent: %d' % len(results.non_existent))
3727 for f in results.non_existent:
3728 print(' %s' % f.path)
3729 print(
3730 'Interesting: %d reduced to %d' % (
3731 len(results.existent), len(simplified)))
3732 for f in simplified:
3733 print(' %s' % f.path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003734
3735 if options.json:
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003736 write_json(sys.stdout, output_as_json, False)
3737 except KeyboardInterrupt:
3738 return 1
3739 except IOError as e:
3740 if e.errno == errno.EPIPE:
3741 # Do not print a stack trace when the output is piped to less and the user
3742 # quits before the whole output was written.
3743 return 1
3744 raise
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003745 return 0
3746
3747
3748class OptionParserWithLogging(optparse.OptionParser):
3749 """Adds --verbose option."""
3750 def __init__(self, verbose=0, **kwargs):
3751 optparse.OptionParser.__init__(self, **kwargs)
3752 self.add_option(
3753 '-v', '--verbose',
3754 action='count',
3755 default=verbose,
3756 help='Use multiple times to increase verbosity')
3757
3758 def parse_args(self, *args, **kwargs):
3759 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
3760 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
3761 logging.basicConfig(
3762 level=levels[min(len(levels)-1, options.verbose)],
3763 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
3764 return options, args
3765
3766
3767class OptionParserWithNiceDescription(OptionParserWithLogging):
3768 """Generates the description with the command's docstring."""
3769 def __init__(self, **kwargs):
3770 """Sets 'description' and 'usage' if not already specified."""
3771 command = kwargs.pop('command', 'help')
3772 kwargs.setdefault(
3773 'description',
3774 re.sub('[\r\n ]{2,}', ' ', get_command_handler(command).__doc__))
3775 kwargs.setdefault('usage', '%%prog %s [options]' % command)
3776 OptionParserWithLogging.__init__(self, **kwargs)
3777
3778
3779class OptionParserTraceInputs(OptionParserWithNiceDescription):
3780 """Adds automatic --log handling."""
3781 def __init__(self, **kwargs):
3782 OptionParserWithNiceDescription.__init__(self, **kwargs)
3783 self.add_option(
3784 '-l', '--log', help='Log file to generate or read, required')
3785
3786 def parse_args(self, *args, **kwargs):
3787 """Makes sure the paths make sense.
3788
3789 On Windows, / and \ are often mixed together in a path.
3790 """
3791 options, args = OptionParserWithNiceDescription.parse_args(
3792 self, *args, **kwargs)
3793 if not options.log:
3794 self.error('Must supply a log file with -l')
3795 options.log = os.path.abspath(options.log)
3796 return options, args
3797
3798
3799def extract_documentation():
3800 """Returns a dict {command: description} for each of documented command."""
3801 commands = (
3802 fn[3:]
3803 for fn in dir(sys.modules['__main__'])
3804 if fn.startswith('CMD') and get_command_handler(fn[3:]).__doc__)
3805 return dict((fn, get_command_handler(fn).__doc__) for fn in commands)
3806
3807
3808def CMDhelp(args):
3809 """Prints list of commands or help for a specific command."""
3810 doc = extract_documentation()
3811 # Calculates the optimal offset.
3812 offset = max(len(cmd) for cmd in doc)
3813 format_str = ' %-' + str(offset + 2) + 's %s'
3814 # Generate a one-liner documentation of each commands.
3815 commands_description = '\n'.join(
3816 format_str % (cmd, doc[cmd].split('\n')[0]) for cmd in sorted(doc))
3817
3818 parser = OptionParserWithNiceDescription(
3819 usage='%prog <command> [options]',
3820 description='Commands are:\n%s\n' % commands_description)
3821 parser.format_description = lambda _: parser.description
3822
3823 # Strip out any -h or --help argument.
3824 _, args = parser.parse_args([i for i in args if not i in ('-h', '--help')])
3825 if len(args) == 1:
3826 if not get_command_handler(args[0]):
3827 parser.error('Unknown command %s' % args[0])
3828 # The command was "%prog help command", replaces ourself with
3829 # "%prog command --help" so help is correctly printed out.
3830 return main(args + ['--help'])
3831 elif args:
3832 parser.error('Unknown argument "%s"' % ' '.join(args))
3833 parser.print_help()
3834 return 0
3835
3836
3837def get_command_handler(name):
3838 """Returns the command handler or CMDhelp if it doesn't exist."""
3839 return getattr(sys.modules['__main__'], 'CMD%s' % name, None)
3840
3841
3842def main_impl(argv):
3843 command = get_command_handler(argv[0] if argv else 'help')
3844 if not command:
3845 return CMDhelp(argv)
3846 return command(argv[1:])
3847
3848def main(argv):
maruel@chromium.orgac36fb72013-05-21 14:50:53 +00003849 disable_buffering()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003850 try:
3851 main_impl(argv)
3852 except TracingFailure, e:
3853 sys.stderr.write('\nError: ')
3854 sys.stderr.write(str(e))
3855 sys.stderr.write('\n')
3856 return 1
3857
3858
3859if __name__ == '__main__':
3860 sys.exit(main(sys.argv[1:]))