blob: 124002598a9a3f1d3d7df6ec767b38d5a4f70ba9 [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
3# Copyright (c) 2012 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
maruel@chromium.org9e56b592013-05-21 16:56:05 +000023import errno
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000024import getpass
25import glob
26import json
27import logging
28import optparse
29import os
30import re
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000031import stat
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000032import subprocess
33import sys
34import tempfile
35import threading
36import time
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000037import unicodedata
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000038import weakref
39
maruel@chromium.orge5322512013-08-19 20:17:57 +000040from third_party import colorama
41from third_party.depot_tools import fix_encoding
42from third_party.depot_tools import subcommand
43
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000044## OS-specific imports
45
46if sys.platform == 'win32':
47 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p
48 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611
49 from ctypes.wintypes import GetLastError # pylint: disable=E0611
50elif sys.platform == 'darwin':
51 import Carbon.File # pylint: disable=F0401
52 import MacOS # pylint: disable=F0401
53
54
maruel@chromium.org3d671992013-08-20 00:38:27 +000055__version__ = '0.1'
56
57
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000058BASE_DIR = os.path.dirname(os.path.abspath(__file__))
59ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
60
61
62class TracingFailure(Exception):
63 """An exception occured during tracing."""
64 def __init__(self, description, pid, line_number, line, *args):
65 super(TracingFailure, self).__init__(
66 description, pid, line_number, line, *args)
67 self.description = description
68 self.pid = pid
69 self.line_number = line_number
70 self.line = line
71 self.extra = args
72
73 def __str__(self):
74 out = self.description
75 if self.pid:
76 out += '\npid: %d' % self.pid
77 if self.line_number:
78 out += '\nline: %d' % self.line_number
79 if self.line:
80 out += '\n%s' % self.line
81 if self.extra:
82 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
83 return out
84
85
86## OS-specific functions
87
88if sys.platform == 'win32':
89 def QueryDosDevice(drive_letter):
90 """Returns the Windows 'native' path for a DOS drive letter."""
91 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
maruel@chromium.org306e0e72012-11-02 18:22:03 +000092 assert isinstance(drive_letter, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000093 # Guesswork. QueryDosDeviceW never returns the required number of bytes.
94 chars = 1024
maruel@chromium.org306e0e72012-11-02 18:22:03 +000095 drive_letter = drive_letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000096 p = create_unicode_buffer(chars)
97 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):
98 err = GetLastError()
99 if err:
100 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000101 msg = u'QueryDosDevice(%s): %s (%d)' % (
102 drive_letter, FormatError(err), err)
103 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000104 return p.value
105
106
107 def GetShortPathName(long_path):
108 """Returns the Windows short path equivalent for a 'long' path."""
maruel@chromium.org68bb1482013-02-15 22:36:24 +0000109 assert isinstance(long_path, unicode), repr(long_path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000110 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
111 # not enforced.
112 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):
113 long_path = '\\\\?\\' + long_path
114 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)
115 if chars:
116 p = create_unicode_buffer(chars)
117 if windll.kernel32.GetShortPathNameW(long_path, p, chars):
118 return p.value
119
120 err = GetLastError()
121 if err:
122 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000123 msg = u'GetShortPathName(%s): %s (%d)' % (
124 long_path, FormatError(err), err)
125 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000126
127
128 def GetLongPathName(short_path):
129 """Returns the Windows long path equivalent for a 'short' path."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000130 assert isinstance(short_path, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000131 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
132 # not enforced.
133 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):
134 short_path = '\\\\?\\' + short_path
135 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)
136 if chars:
137 p = create_unicode_buffer(chars)
138 if windll.kernel32.GetLongPathNameW(short_path, p, chars):
139 return p.value
140
141 err = GetLastError()
142 if err:
143 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000144 msg = u'GetLongPathName(%s): %s (%d)' % (
145 short_path, FormatError(err), err)
146 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000147
148
149 def get_current_encoding():
150 """Returns the 'ANSI' code page associated to the process."""
151 return 'cp%d' % int(windll.kernel32.GetACP())
152
153
154 class DosDriveMap(object):
155 """Maps \Device\HarddiskVolumeN to N: on Windows."""
156 # Keep one global cache.
157 _MAPPING = {}
158
159 def __init__(self):
160 """Lazy loads the cache."""
161 if not self._MAPPING:
162 # This is related to UNC resolver on windows. Ignore that.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000163 self._MAPPING[u'\\Device\\Mup'] = None
164 self._MAPPING[u'\\SystemRoot'] = os.environ[u'SystemRoot']
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000165
166 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
167 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000168 letter = u'%s:' % letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000169 mapped = QueryDosDevice(letter)
170 if mapped in self._MAPPING:
171 logging.warn(
172 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
173 '. Drive letters are a user-mode concept and the kernel '
174 'traces only have NT path, so all accesses will be '
175 'associated with the first drive letter, independent of the '
176 'actual letter used by the code') % (
177 self._MAPPING[mapped], letter))
178 else:
179 self._MAPPING[mapped] = letter
180 except WindowsError: # pylint: disable=E0602
181 pass
182
183 def to_win32(self, path):
184 """Converts a native NT path to Win32/DOS compatible path."""
185 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
186 if not match:
187 raise ValueError(
188 'Can\'t convert %s into a Win32 compatible path' % path,
189 path)
190 if not match.group(1) in self._MAPPING:
191 # Unmapped partitions may be accessed by windows for the
192 # fun of it while the test is running. Discard these.
193 return None
194 drive = self._MAPPING[match.group(1)]
195 if not drive or not match.group(2):
196 return drive
197 return drive + match.group(2)
198
199
200 def isabs(path):
201 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""
202 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
203
204
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000205 def find_item_native_case(root, item):
206 """Gets the native path case of a single item based at root_path."""
207 if item == '..':
208 return item
209
210 root = get_native_path_case(root)
211 return os.path.basename(get_native_path_case(os.path.join(root, item)))
212
213
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000214 def get_native_path_case(p):
215 """Returns the native path case for an existing file.
216
217 On Windows, removes any leading '\\?\'.
218 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000219 assert isinstance(p, unicode), repr(p)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000220 if not isabs(p):
221 raise ValueError(
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000222 'get_native_path_case(%r): Require an absolute path' % p, p)
223
maruel@chromium.org037758d2012-12-10 17:59:46 +0000224 # Make sure it is normalized to os.path.sep. Do not do it here to keep the
225 # function fast
226 assert '/' not in p, p
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000227 suffix = ''
228 count = p.count(':')
229 if count > 1:
230 # This means it has an alternate-data stream. There could be 3 ':', since
231 # it could be the $DATA datastream of an ADS. Split the whole ADS suffix
232 # off and add it back afterward. There is no way to know the native path
233 # case of an alternate data stream.
234 items = p.split(':')
235 p = ':'.join(items[0:2])
236 suffix = ''.join(':' + i for i in items[2:])
237
maruel@chromium.org42b091d2013-04-02 22:27:02 +0000238 # TODO(maruel): Use os.path.normpath?
239 if p.endswith('.\\'):
240 p = p[:-2]
241
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000242 # Windows used to have an option to turn on case sensitivity on non Win32
243 # subsystem but that's out of scope here and isn't supported anymore.
244 # Go figure why GetShortPathName() is needed.
245 try:
246 out = GetLongPathName(GetShortPathName(p))
247 except OSError, e:
248 if e.args[0] in (2, 3, 5):
249 # The path does not exist. Try to recurse and reconstruct the path.
250 base = os.path.dirname(p)
251 rest = os.path.basename(p)
252 return os.path.join(get_native_path_case(base), rest)
253 raise
254 if out.startswith('\\\\?\\'):
255 out = out[4:]
256 # Always upper case the first letter since GetLongPathName() will return the
257 # drive letter in the case it was given.
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000258 return out[0].upper() + out[1:] + suffix
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000259
260
261 def CommandLineToArgvW(command_line):
262 """Splits a commandline into argv using CommandLineToArgvW()."""
263 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
264 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000265 assert isinstance(command_line, unicode)
266 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000267 try:
268 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
269 finally:
270 windll.kernel32.LocalFree(ptr)
271
272
273elif sys.platform == 'darwin':
274
275
276 # On non-windows, keep the stdlib behavior.
277 isabs = os.path.isabs
278
279
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000280 def _native_case(p):
281 """Gets the native path case. Warning: this function resolves symlinks."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000282 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000283 rel_ref, _ = Carbon.File.FSPathMakeRef(p.encode('utf-8'))
284 # The OSX underlying code uses NFD but python strings are in NFC. This
285 # will cause issues with os.listdir() for example. Since the dtrace log
286 # *is* in NFC, normalize it here.
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000287 out = unicodedata.normalize(
288 'NFC', rel_ref.FSRefMakePath().decode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000289 if p.endswith(os.path.sep) and not out.endswith(os.path.sep):
290 return out + os.path.sep
291 return out
292 except MacOS.Error, e:
293 if e.args[0] in (-43, -120):
294 # The path does not exist. Try to recurse and reconstruct the path.
295 # -43 means file not found.
296 # -120 means directory not found.
297 base = os.path.dirname(p)
298 rest = os.path.basename(p)
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000299 return os.path.join(_native_case(base), rest)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000300 raise OSError(
301 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])
302
303
304 def _split_at_symlink_native(base_path, rest):
305 """Returns the native path for a symlink."""
306 base, symlink, rest = split_at_symlink(base_path, rest)
307 if symlink:
308 if not base_path:
309 base_path = base
310 else:
311 base_path = safe_join(base_path, base)
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000312 symlink = find_item_native_case(base_path, symlink)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000313 return base, symlink, rest
314
315
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000316 def find_item_native_case(root_path, item):
317 """Gets the native path case of a single item based at root_path.
318
319 There is no API to get the native path case of symlinks on OSX. So it
320 needs to be done the slow way.
321 """
322 if item == '..':
323 return item
324
325 item = item.lower()
326 for element in os.listdir(root_path):
327 if element.lower() == item:
328 return element
329
330
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000331 def get_native_path_case(path):
332 """Returns the native path case for an existing file.
333
334 Technically, it's only HFS+ on OSX that is case preserving and
335 insensitive. It's the default setting on HFS+ but can be changed.
336 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000337 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000338 if not isabs(path):
339 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000340 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000341 if path.startswith('/dev'):
342 # /dev is not visible from Carbon, causing an exception.
343 return path
344
345 # Starts assuming there is no symlink along the path.
346 resolved = _native_case(path)
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000347 if path.lower() in (resolved.lower(), resolved.lower() + './'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000348 # This code path is incredibly faster.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000349 logging.debug('get_native_path_case(%s) = %s' % (path, resolved))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000350 return resolved
351
352 # There was a symlink, process it.
353 base, symlink, rest = _split_at_symlink_native(None, path)
354 assert symlink, (path, base, symlink, rest, resolved)
355 prev = base
356 base = safe_join(_native_case(base), symlink)
357 assert len(base) > len(prev)
358 while rest:
359 prev = base
360 relbase, symlink, rest = _split_at_symlink_native(base, rest)
361 base = safe_join(base, relbase)
362 assert len(base) > len(prev), (prev, base, symlink)
363 if symlink:
364 base = safe_join(base, symlink)
365 assert len(base) > len(prev), (prev, base, symlink)
366 # Make sure no symlink was resolved.
367 assert base.lower() == path.lower(), (base, path)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000368 logging.debug('get_native_path_case(%s) = %s' % (path, base))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000369 return base
370
371
372else: # OSes other than Windows and OSX.
373
374
375 # On non-windows, keep the stdlib behavior.
376 isabs = os.path.isabs
377
378
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000379 def find_item_native_case(root, item):
380 """Gets the native path case of a single item based at root_path."""
381 if item == '..':
382 return item
383
384 root = get_native_path_case(root)
385 return os.path.basename(get_native_path_case(os.path.join(root, item)))
386
387
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000388 def get_native_path_case(path):
389 """Returns the native path case for an existing file.
390
391 On OSes other than OSX and Windows, assume the file system is
392 case-sensitive.
393
394 TODO(maruel): This is not strictly true. Implement if necessary.
395 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000396 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000397 if not isabs(path):
398 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000399 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000400 # Give up on cygwin, as GetLongPathName() can't be called.
401 # Linux traces tends to not be normalized so use this occasion to normalize
402 # it. This function implementation already normalizes the path on the other
403 # OS so this needs to be done here to be coherent between OSes.
404 out = os.path.normpath(path)
405 if path.endswith(os.path.sep) and not out.endswith(os.path.sep):
406 return out + os.path.sep
407 return out
408
409
410if sys.platform != 'win32': # All non-Windows OSes.
411
412
413 def safe_join(*args):
414 """Joins path elements like os.path.join() but doesn't abort on absolute
415 path.
416
417 os.path.join('foo', '/bar') == '/bar'
418 but safe_join('foo', '/bar') == 'foo/bar'.
419 """
420 out = ''
421 for element in args:
422 if element.startswith(os.path.sep):
423 if out.endswith(os.path.sep):
424 out += element[1:]
425 else:
426 out += element
427 else:
428 if out.endswith(os.path.sep):
429 out += element
430 else:
431 out += os.path.sep + element
432 return out
433
434
435 def split_at_symlink(base_dir, relfile):
436 """Scans each component of relfile and cut the string at the symlink if
437 there is any.
438
439 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if
440 not symlink was found.
441 """
442 if base_dir:
443 assert relfile
444 assert os.path.isabs(base_dir)
445 index = 0
446 else:
447 assert os.path.isabs(relfile)
448 index = 1
449
450 def at_root(rest):
451 if base_dir:
452 return safe_join(base_dir, rest)
453 return rest
454
455 while True:
456 try:
457 index = relfile.index(os.path.sep, index)
458 except ValueError:
459 index = len(relfile)
460 full = at_root(relfile[:index])
461 if os.path.islink(full):
462 # A symlink!
463 base = os.path.dirname(relfile[:index])
464 symlink = os.path.basename(relfile[:index])
465 rest = relfile[index:]
466 logging.debug(
467 'split_at_symlink(%s, %s) -> (%s, %s, %s)' %
468 (base_dir, relfile, base, symlink, rest))
469 return base, symlink, rest
470 if index == len(relfile):
471 break
472 index += 1
473 return relfile, None, None
474
475
maruel@chromium.orgac36fb72013-05-21 14:50:53 +0000476class Unbuffered(object):
477 """Disable buffering on a file object."""
478 def __init__(self, stream):
479 self.stream = stream
480
481 def write(self, data):
482 self.stream.write(data)
483 if '\n' in data:
484 self.stream.flush()
485
486 def __getattr__(self, attr):
487 return getattr(self.stream, attr)
488
489
490def disable_buffering():
491 """Makes this process and child processes stdout unbuffered."""
492 if not os.environ.get('PYTHONUNBUFFERED'):
493 # Since sys.stdout is a C++ object, it's impossible to do
494 # sys.stdout.write = lambda...
495 sys.stdout = Unbuffered(sys.stdout)
496 os.environ['PYTHONUNBUFFERED'] = 'x'
497
498
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000499def fix_python_path(cmd):
500 """Returns the fixed command line to call the right python executable."""
501 out = cmd[:]
502 if out[0] == 'python':
503 out[0] = sys.executable
504 elif out[0].endswith('.py'):
505 out.insert(0, sys.executable)
506 return out
507
508
maruel@chromium.org3683afe2013-07-27 00:09:27 +0000509def gen_blacklist(regexes):
510 """Returns a lambda to be used as a blacklist."""
511 compiled = [re.compile(i) for i in regexes]
512 def match(f):
513 return any(j.match(f) for j in compiled)
514 return match
515
516
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000517def create_subprocess_thunk():
518 """Creates a small temporary script to start the child process.
519
520 This thunk doesn't block, its unique name is used to identify it as the
521 parent.
522 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000523 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000524 try:
525 os.write(
526 handle,
527 (
528 'import subprocess, sys\n'
529 'sys.exit(subprocess.call(sys.argv[2:]))\n'
530 ))
531 finally:
532 os.close(handle)
533 return name
534
535
536def create_exec_thunk():
537 """Creates a small temporary script to start the child executable.
538
539 Reads from the file handle provided as the fisrt argument to block, then
540 execv() the command to be traced.
541 """
542 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
543 try:
544 os.write(
545 handle,
546 (
547 'import os, sys\n'
548 'fd = int(sys.argv[1])\n'
549 # This will block until the controlling process writes a byte on the
550 # pipe. It will do so once the tracing tool, e.g. strace, is ready to
551 # trace.
552 'os.read(fd, 1)\n'
553 'os.close(fd)\n'
554 'os.execv(sys.argv[2], sys.argv[2:])\n'
555 ))
556 finally:
557 os.close(handle)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000558 return name
559
560
561def strace_process_quoted_arguments(text):
562 """Extracts quoted arguments on a string and return the arguments as a list.
563
564 Implemented as an automaton. Supports incomplete strings in the form
565 '"foo"...'.
566
567 Example:
568 With text = '"foo", "bar"', the function will return ['foo', 'bar']
569
570 TODO(maruel): Implement escaping.
571 """
572 # All the possible states of the DFA.
573 ( NEED_QUOTE, # Begining of a new arguments.
574 INSIDE_STRING, # Inside an argument.
575 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
576 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
577 # a serie of 3 dots or a comma.
578 NEED_SPACE, # Right after a comma
579 NEED_DOT_2, # Found a dot, need a second one.
580 NEED_DOT_3, # Found second dot, need a third one.
581 NEED_COMMA, # Found third dot, need a comma.
582 ) = range(8)
583
584 state = NEED_QUOTE
585 out = []
586 for index, char in enumerate(text):
587 if char == '"':
588 if state == NEED_QUOTE:
589 state = INSIDE_STRING
590 # A new argument was found.
591 out.append('')
592 elif state == INSIDE_STRING:
593 # The argument is now closed.
594 state = NEED_COMMA_OR_DOT
595 elif state == ESCAPED:
596 out[-1] += char
597 state = INSIDE_STRING
598 else:
599 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000600 'Can\'t process char \'%s\' at column %d for: %r' % (
601 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000602 index,
603 text)
604 elif char == ',':
605 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
606 state = NEED_SPACE
607 elif state == INSIDE_STRING:
608 out[-1] += char
609 elif state == ESCAPED:
610 out[-1] += char
611 state = INSIDE_STRING
612 else:
613 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000614 'Can\'t process char \'%s\' at column %d for: %r' % (
615 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000616 index,
617 text)
618 elif char == ' ':
619 if state == NEED_SPACE:
620 state = NEED_QUOTE
621 elif state == INSIDE_STRING:
622 out[-1] += char
623 elif state == ESCAPED:
624 out[-1] += char
625 state = INSIDE_STRING
626 else:
627 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000628 'Can\'t process char \'%s\' at column %d for: %r' % (
629 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000630 index,
631 text)
632 elif char == '.':
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000633 if state in (NEED_QUOTE, NEED_COMMA_OR_DOT):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000634 # The string is incomplete, this mean the strace -s flag should be
635 # increased.
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000636 # For NEED_QUOTE, the input string would look like '"foo", ...'.
637 # For NEED_COMMA_OR_DOT, the input string would look like '"foo"...'
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000638 state = NEED_DOT_2
639 elif state == NEED_DOT_2:
640 state = NEED_DOT_3
641 elif state == NEED_DOT_3:
642 state = NEED_COMMA
643 elif state == INSIDE_STRING:
644 out[-1] += char
645 elif state == ESCAPED:
646 out[-1] += char
647 state = INSIDE_STRING
648 else:
649 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000650 'Can\'t process char \'%s\' at column %d for: %r' % (
651 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000652 index,
653 text)
654 elif char == '\\':
655 if state == ESCAPED:
656 out[-1] += char
657 state = INSIDE_STRING
658 elif state == INSIDE_STRING:
659 state = ESCAPED
660 else:
661 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000662 'Can\'t process char \'%s\' at column %d for: %r' % (
663 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000664 index,
665 text)
666 else:
667 if state == INSIDE_STRING:
668 out[-1] += char
669 else:
670 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000671 'Can\'t process char \'%s\' at column %d for: %r' % (
672 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000673 index,
674 text)
675 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
676 raise ValueError(
677 'String is incorrectly terminated: %r' % text,
678 text)
679 return out
680
681
682def read_json(filepath):
683 with open(filepath, 'r') as f:
684 return json.load(f)
685
686
687def write_json(filepath_or_handle, data, dense):
688 """Writes data into filepath or file handle encoded as json.
689
690 If dense is True, the json is packed. Otherwise, it is human readable.
691 """
692 if hasattr(filepath_or_handle, 'write'):
693 if dense:
694 filepath_or_handle.write(
695 json.dumps(data, sort_keys=True, separators=(',',':')))
696 else:
697 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
698 else:
699 with open(filepath_or_handle, 'wb') as f:
700 if dense:
701 json.dump(data, f, sort_keys=True, separators=(',',':'))
702 else:
703 json.dump(data, f, sort_keys=True, indent=2)
704
705
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000706def assert_is_renderable(pseudo_string):
707 """Asserts the input is a valid object to be processed by render()."""
708 assert (
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000709 pseudo_string is None or
710 isinstance(pseudo_string, unicode) or
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000711 hasattr(pseudo_string, 'render')), repr(pseudo_string)
712
713
714def render(pseudo_string):
715 """Converts the pseudo-string to an unicode string."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000716 if pseudo_string is None or isinstance(pseudo_string, unicode):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000717 return pseudo_string
718 return pseudo_string.render()
719
720
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000721class Results(object):
722 """Results of a trace session."""
723
724 class _TouchedObject(object):
725 """Something, a file or a directory, that was accessed."""
726 def __init__(self, root, path, tainted, size, nb_files):
727 logging.debug(
728 '%s(%s, %s, %s, %s, %s)' %
729 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000730 assert_is_renderable(root)
731 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000732 self.root = root
733 self.path = path
734 self.tainted = tainted
735 self.nb_files = nb_files
maruel@chromium.org538141b2013-06-03 20:57:17 +0000736 # Can be used as a cache or a default value, depending on context. In
737 # particular, once self.tainted is True, because the path was replaced
738 # with a variable, it is not possible to look up the file size.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000739 self._size = size
740 # These are cache only.
741 self._real_path = None
742
743 # Check internal consistency.
744 assert path, path
745 assert tainted or bool(root) != bool(isabs(path)), (root, path)
746 assert tainted or (
747 not os.path.exists(self.full_path) or
748 (self.full_path == get_native_path_case(self.full_path))), (
749 tainted, self.full_path, get_native_path_case(self.full_path))
750
751 @property
752 def existent(self):
753 return self.size != -1
754
755 @property
756 def full_path(self):
757 if self.root:
758 return os.path.join(self.root, self.path)
759 return self.path
760
761 @property
762 def real_path(self):
763 """Returns the path with symlinks resolved."""
764 if not self._real_path:
765 self._real_path = os.path.realpath(self.full_path)
766 return self._real_path
767
768 @property
769 def size(self):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000770 """File's size. -1 is not existent.
771
772 Once tainted, it is not possible the retrieve the file size anymore since
773 the path is composed of variables.
774 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000775 if self._size is None and not self.tainted:
776 try:
777 self._size = os.stat(self.full_path).st_size
778 except OSError:
779 self._size = -1
780 return self._size
781
782 def flatten(self):
783 """Returns a dict representing this object.
784
785 A 'size' of 0 means the file was only touched and not read.
786 """
787 return {
788 'path': self.path,
789 'size': self.size,
790 }
791
792 def replace_variables(self, variables):
793 """Replaces the root of this File with one of the variables if it matches.
794
795 If a variable replacement occurs, the cloned object becomes tainted.
796 """
797 for variable, root_path in variables.iteritems():
798 if self.path.startswith(root_path):
799 return self._clone(
800 self.root, variable + self.path[len(root_path):], True)
801 # No need to clone, returns ourself.
802 return self
803
804 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000805 """Returns a clone of itself with 'root' stripped off.
806
807 Note that the file is kept if it is either accessible from a symlinked
808 path that was used to access the file or through the real path.
809 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000810 # Check internal consistency.
811 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root
812 if not self.full_path.startswith(root):
813 # Now try to resolve the symlinks to see if it can be reached this way.
814 # Only try *after* trying without resolving symlink.
815 if not self.real_path.startswith(root):
816 return None
817 path = self.real_path
818 else:
819 path = self.full_path
820 return self._clone(root, path[len(root):], self.tainted)
821
822 def _clone(self, new_root, new_path, tainted):
823 raise NotImplementedError(self.__class__.__name__)
824
825 class File(_TouchedObject):
826 """A file that was accessed. May not be present anymore.
827
828 If tainted is true, it means it is not a real path anymore as a variable
829 replacement occured.
830
maruel@chromium.org538141b2013-06-03 20:57:17 +0000831 |mode| can be one of None, TOUCHED, READ or WRITE.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000832 """
maruel@chromium.org538141b2013-06-03 20:57:17 +0000833 # Was probed for existence, and it is existent, but was never _opened_.
834 TOUCHED = 't'
835 # Opened for read only and guaranteed to not have been written to.
836 READ = 'r'
837 # Opened for write.
838 WRITE = 'w'
839
840 # They are listed in order of priority. E.g. if a file is traced as TOUCHED
841 # then as WRITE, only keep WRITE. None means no idea, which is a problem on
842 # Windows.
843 ACCEPTABLE_MODES = (None, TOUCHED, READ, WRITE)
844
845 def __init__(self, root, path, tainted, size, mode):
846 assert mode in self.ACCEPTABLE_MODES
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000847 super(Results.File, self).__init__(root, path, tainted, size, 1)
maruel@chromium.org538141b2013-06-03 20:57:17 +0000848 self.mode = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000849
850 def _clone(self, new_root, new_path, tainted):
851 """Clones itself keeping meta-data."""
852 # Keep the self.size and self._real_path caches for performance reason. It
853 # is also important when the file becomes tainted (with a variable instead
854 # of the real path) since self.path is not an on-disk path anymore so
855 # out._size cannot be updated.
maruel@chromium.org538141b2013-06-03 20:57:17 +0000856 out = self.__class__(new_root, new_path, tainted, self.size, self.mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000857 out._real_path = self._real_path
858 return out
859
maruel@chromium.org538141b2013-06-03 20:57:17 +0000860 def flatten(self):
861 out = super(Results.File, self).flatten()
862 out['mode'] = self.mode
863 return out
864
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000865 class Directory(_TouchedObject):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000866 """A directory of files. Must exist.
867
868 For a Directory instance, self.size is not a cache, it's an actual value
869 that is never modified and represents the total size of the files contained
870 in this directory. It is possible that the directory is empty so that
871 size==0; this happens if there's only an invalid symlink in it.
872 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000873 def __init__(self, root, path, tainted, size, nb_files):
874 """path='.' is a valid value and must be handled appropriately."""
875 assert not path.endswith(os.path.sep), path
876 super(Results.Directory, self).__init__(
877 root, path + os.path.sep, tainted, size, nb_files)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000878
879 def flatten(self):
880 out = super(Results.Directory, self).flatten()
881 out['nb_files'] = self.nb_files
882 return out
883
884 def _clone(self, new_root, new_path, tainted):
885 """Clones itself keeping meta-data."""
886 out = self.__class__(
887 new_root,
888 new_path.rstrip(os.path.sep),
889 tainted,
890 self.size,
891 self.nb_files)
892 out._real_path = self._real_path
893 return out
894
895 class Process(object):
896 """A process that was traced.
897
898 Contains references to the files accessed by this process and its children.
899 """
900 def __init__(self, pid, files, executable, command, initial_cwd, children):
901 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
902 self.pid = pid
903 self.files = sorted(files, key=lambda x: x.path)
904 self.children = children
905 self.executable = executable
906 self.command = command
907 self.initial_cwd = initial_cwd
908
909 # Check internal consistency.
910 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
911 f.path for f in self.files)
912 assert isinstance(self.children, list)
913 assert isinstance(self.files, list)
914
915 @property
916 def all(self):
917 for child in self.children:
918 for i in child.all:
919 yield i
920 yield self
921
922 def flatten(self):
923 return {
924 'children': [c.flatten() for c in self.children],
925 'command': self.command,
926 'executable': self.executable,
927 'files': [f.flatten() for f in self.files],
928 'initial_cwd': self.initial_cwd,
929 'pid': self.pid,
930 }
931
932 def strip_root(self, root):
933 assert isabs(root) and root.endswith(os.path.sep), root
934 # Loads the files after since they are constructed as objects.
935 out = self.__class__(
936 self.pid,
937 filter(None, (f.strip_root(root) for f in self.files)),
938 self.executable,
939 self.command,
940 self.initial_cwd,
941 [c.strip_root(root) for c in self.children])
942 logging.debug(
943 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
944 return out
945
946 def __init__(self, process):
947 self.process = process
948 # Cache.
949 self._files = None
950
951 def flatten(self):
952 return {
953 'root': self.process.flatten(),
954 }
955
956 @property
957 def files(self):
958 if self._files is None:
959 self._files = sorted(
960 sum((p.files for p in self.process.all), []),
961 key=lambda x: x.path)
962 return self._files
963
964 @property
965 def existent(self):
966 return [f for f in self.files if f.existent]
967
968 @property
969 def non_existent(self):
970 return [f for f in self.files if not f.existent]
971
972 def strip_root(self, root):
973 """Returns a clone with all the files outside the directory |root| removed
974 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000975
976 It keeps files accessible through the |root| directory or that have been
977 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000978 """
979 # Resolve any symlink
980 root = os.path.realpath(root)
981 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep
982 logging.debug('strip_root(%s)' % root)
983 return Results(self.process.strip_root(root))
984
985
986class ApiBase(object):
987 """OS-agnostic API to trace a process and its children."""
988 class Context(object):
989 """Processes one log line at a time and keeps the list of traced processes.
990
991 The parsing is complicated by the fact that logs are traced out of order for
992 strace but in-order for dtrace and logman. In addition, on Windows it is
993 very frequent that processids are reused so a flat list cannot be used. But
994 at the same time, it is impossible to faithfully construct a graph when the
995 logs are processed out of order. So both a tree and a flat mapping are used,
996 the tree is the real process tree, while the flat mapping stores the last
997 valid process for the corresponding processid. For the strace case, the
998 tree's head is guessed at the last moment.
999 """
1000 class Process(object):
1001 """Keeps context for one traced child process.
1002
1003 Logs all the files this process touched. Ignores directories.
1004 """
1005 def __init__(self, blacklist, pid, initial_cwd):
1006 # Check internal consistency.
1007 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001008 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001009 self.pid = pid
1010 # children are Process instances.
1011 self.children = []
1012 self.initial_cwd = initial_cwd
1013 self.cwd = None
maruel@chromium.org538141b2013-06-03 20:57:17 +00001014 self.files = {}
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001015 self.executable = None
1016 self.command = None
1017 self._blacklist = blacklist
1018
1019 def to_results_process(self):
1020 """Resolves file case sensitivity and or late-bound strings."""
1021 # When resolving files, it's normal to get dupe because a file could be
1022 # opened multiple times with different case. Resolve the deduplication
1023 # here.
1024 def fix_path(x):
1025 """Returns the native file path case.
1026
1027 Converts late-bound strings.
1028 """
1029 if not x:
1030 # Do not convert None instance to 'None'.
1031 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001032 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001033 if os.path.isabs(x):
1034 # If the path is not absolute, which tends to happen occasionally on
1035 # Windows, it is not possible to get the native path case so ignore
1036 # that trace. It mostly happens for 'executable' value.
1037 x = get_native_path_case(x)
1038 return x
1039
maruel@chromium.org538141b2013-06-03 20:57:17 +00001040 def fix_and_blacklist_path(x, m):
1041 """Receives a tuple (filepath, mode) and processes filepath."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001042 x = fix_path(x)
1043 if not x:
1044 return
1045 # The blacklist needs to be reapplied, since path casing could
1046 # influence blacklisting.
1047 if self._blacklist(x):
1048 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001049 # Filters out directories. Some may have passed through.
1050 if os.path.isdir(x):
1051 return
1052 return x, m
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001053
maruel@chromium.org538141b2013-06-03 20:57:17 +00001054 # Renders all the files as strings, as some could be RelativePath
1055 # instances. It is important to do it first since there could still be
1056 # multiple entries with the same path but different modes.
1057 rendered = (
1058 fix_and_blacklist_path(f, m) for f, m in self.files.iteritems())
1059 files = sorted(
1060 (f for f in rendered if f),
1061 key=lambda x: (x[0], Results.File.ACCEPTABLE_MODES.index(x[1])))
1062 # Then converting into a dict will automatically clean up lesser
1063 # important values.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001064 files = [
maruel@chromium.org538141b2013-06-03 20:57:17 +00001065 Results.File(None, f, False, None, m)
1066 for f, m in dict(files).iteritems()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001067 ]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001068 return Results.Process(
1069 self.pid,
1070 files,
1071 fix_path(self.executable),
1072 self.command,
1073 fix_path(self.initial_cwd),
1074 [c.to_results_process() for c in self.children])
1075
maruel@chromium.org538141b2013-06-03 20:57:17 +00001076 def add_file(self, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001077 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001078 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001079 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001080 logging.debug('add_file(%d, %s, %s)', self.pid, filepath, mode)
1081 # Note that filepath and not render(filepath) is added. It is because
1082 # filepath could be something else than a string, like a RelativePath
1083 # instance for dtrace logs.
1084 modes = Results.File.ACCEPTABLE_MODES
1085 old_mode = self.files.setdefault(filepath, mode)
1086 if old_mode != mode and modes.index(old_mode) < modes.index(mode):
1087 # Take the highest value.
1088 self.files[filepath] = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001089
1090 def __init__(self, blacklist):
1091 self.blacklist = blacklist
1092 # Initial process.
1093 self.root_process = None
1094 # dict to accelerate process lookup, to not have to lookup the whole graph
1095 # each time.
1096 self._process_lookup = {}
1097
1098 class Tracer(object):
1099 """During it's lifetime, the tracing subsystem is enabled."""
1100 def __init__(self, logname):
1101 self._logname = logname
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001102 self._lock = threading.RLock()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001103 self._traces = []
1104 self._initialized = True
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001105 self._scripts_to_cleanup = []
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001106
1107 def trace(self, cmd, cwd, tracename, output):
1108 """Runs the OS-specific trace program on an executable.
1109
1110 Arguments:
1111 - cmd: The command (a list) to run.
1112 - cwd: Current directory to start the child process in.
1113 - tracename: Name of the trace in the logname file.
1114 - output: If False, redirects output to PIPEs.
1115
1116 Returns a tuple (resultcode, output) and updates the internal trace
1117 entries.
1118 """
1119 # The implementation adds an item to self._traces.
1120 raise NotImplementedError(self.__class__.__name__)
1121
1122 def close(self, _timeout=None):
1123 """Saves the meta-data in the logname file.
1124
1125 For kernel-based tracing, stops the tracing subsystem.
1126
1127 Must not be used manually when using 'with' construct.
1128 """
1129 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001130 if not self._initialized:
1131 raise TracingFailure(
1132 'Called %s.close() on an unitialized object' %
1133 self.__class__.__name__,
1134 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001135 try:
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001136 while self._scripts_to_cleanup:
1137 try:
1138 os.remove(self._scripts_to_cleanup.pop())
1139 except OSError as e:
1140 logging.error('Failed to delete a temporary script: %s', e)
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001141 write_json(self._logname, self._gen_logdata(), False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001142 finally:
1143 self._initialized = False
1144
1145 def post_process_log(self):
1146 """Post-processes the log so it becomes faster to load afterward.
1147
1148 Must not be used manually when using 'with' construct.
1149 """
1150 assert not self._initialized, 'Must stop tracing first.'
1151
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001152 def _gen_logdata(self):
1153 """Returns the data to be saved in the trace file."""
1154 return {
1155 'traces': self._traces,
1156 }
1157
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001158 def __enter__(self):
1159 """Enables 'with' statement."""
1160 return self
1161
1162 def __exit__(self, exc_type, exc_value, traceback):
1163 """Enables 'with' statement."""
1164 self.close()
1165 # If an exception was thrown, do not process logs.
1166 if not exc_type:
1167 self.post_process_log()
1168
1169 def get_tracer(self, logname):
1170 """Returns an ApiBase.Tracer instance.
1171
1172 Initializes the tracing subsystem, which is a requirement for kernel-based
1173 tracers. Only one tracer instance should be live at a time!
1174
1175 logname is the filepath to the json file that will contain the meta-data
1176 about the logs.
1177 """
1178 return self.Tracer(logname)
1179
1180 @staticmethod
1181 def clean_trace(logname):
1182 """Deletes an old log."""
1183 raise NotImplementedError()
1184
1185 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001186 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001187 """Processes trace logs and returns the files opened and the files that do
1188 not exist.
1189
1190 It does not track directories.
1191
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001192 Arguments:
1193 - logname: must be an absolute path.
1194 - blacklist: must be a lambda.
1195 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001196
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001197 Most of the time, files that do not exist are temporary test files that
1198 should be put in /tmp instead. See http://crbug.com/116251.
1199
1200 Returns a list of dict with keys:
1201 - results: A Results instance.
1202 - trace: The corresponding tracename parameter provided to
1203 get_tracer().trace().
1204 - output: Output gathered during execution, if get_tracer().trace(...,
1205 output=False) was used.
1206 """
1207 raise NotImplementedError(cls.__class__.__name__)
1208
1209
1210class Strace(ApiBase):
1211 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001212 @staticmethod
1213 def load_filename(filename):
1214 """Parses a filename in a log."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001215 # TODO(maruel): Be compatible with strace -x.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001216 assert isinstance(filename, str)
1217 out = ''
1218 i = 0
1219 while i < len(filename):
1220 c = filename[i]
1221 if c == '\\':
1222 out += chr(int(filename[i+1:i+4], 8))
1223 i += 4
1224 else:
1225 out += c
1226 i += 1
1227 # TODO(maruel): That's not necessarily true that the current code page is
1228 # utf-8.
1229 return out.decode('utf-8')
1230
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001231 class Context(ApiBase.Context):
1232 """Processes a strace log line and keeps the list of existent and non
1233 existent files accessed.
1234
1235 Ignores directories.
1236
1237 Uses late-binding to processes the cwd of each process. The problem is that
1238 strace generates one log file per process it traced but doesn't give any
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001239 information about which process was started when and by who. So process the
1240 logs out of order and use late binding with RelativePath to be able to
1241 deduce the initial directory of each process once all the logs are parsed.
1242
1243 TODO(maruel): Use the script even in the non-sudo case, so log parsing can
1244 be done in two phase: first find the root process, then process the child
1245 processes in order. With that, it should be possible to not use RelativePath
1246 anymore. This would significantly simplify the code!
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001247 """
1248 class Process(ApiBase.Context.Process):
1249 """Represents the state of a process.
1250
1251 Contains all the information retrieved from the pid-specific log.
1252 """
1253 # Function names are using ([a-z_0-9]+)
1254 # This is the most common format. function(args) = result
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001255 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.*?)\)\s+= (.+)$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001256 # An interrupted function call, only grab the minimal header.
1257 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
1258 # A resumed function call.
1259 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
1260 # A process received a signal.
1261 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
1262 # A process didn't handle a signal. Ignore any junk appearing before,
1263 # because the process was forcibly killed so it won't open any new file.
1264 RE_KILLED = re.compile(
1265 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
1266 # The process has exited.
1267 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
1268 # A call was canceled. Ignore any prefix.
1269 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
1270 # Happens when strace fails to even get the function name.
1271 UNNAMED_FUNCTION = '????'
1272
1273 # Corner-case in python, a class member function decorator must not be
1274 # @staticmethod.
1275 def parse_args(regexp, expect_zero): # pylint: disable=E0213
1276 """Automatically convert the str 'args' into a list of processed
1277 arguments.
1278
1279 Arguments:
1280 - regexp is used to parse args.
1281 - expect_zero: one of True, False or None.
1282 - True: will check for result.startswith('0') first and will ignore
1283 the trace line completely otherwise. This is important because for
1284 many functions, the regexp will not process if the call failed.
1285 - False: will check for not result.startswith(('?', '-1')) for the
1286 same reason than with True.
1287 - None: ignore result.
1288 """
1289 def meta_hook(function):
1290 assert function.__name__.startswith('handle_')
1291 def hook(self, args, result):
1292 if expect_zero is True and not result.startswith('0'):
1293 return
1294 if expect_zero is False and result.startswith(('?', '-1')):
1295 return
1296 match = re.match(regexp, args)
1297 if not match:
1298 raise TracingFailure(
1299 'Failed to parse %s(%s) = %s' %
1300 (function.__name__[len('handle_'):], args, result),
1301 None, None, None)
1302 return function(self, match.groups(), result)
1303 return hook
1304 return meta_hook
1305
1306 class RelativePath(object):
1307 """A late-bound relative path."""
1308 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001309 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001310 self.parent = parent
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001311 assert (
1312 value is None or
1313 (isinstance(value, unicode) and not os.path.isabs(value)))
1314 self.value = value
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001315 if self.value:
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001316 # TODO(maruel): On POSIX, '\\' is a valid character so remove this
1317 # assert.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001318 assert '\\' not in self.value, value
1319 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001320
1321 def render(self):
1322 """Returns the current directory this instance is representing.
1323
1324 This function is used to return the late-bound value.
1325 """
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001326 assert self.parent is not None
1327 parent = render(self.parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001328 if self.value:
1329 return os.path.normpath(os.path.join(parent, self.value))
1330 return parent
1331
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001332 def __init__(self, root, pid):
1333 """Keeps enough information to be able to guess the original process
1334 root.
1335
1336 strace doesn't store which process was the initial process. So more
1337 information needs to be kept so the graph can be reconstructed from the
1338 flat map.
1339 """
1340 logging.info('%s(%d)' % (self.__class__.__name__, pid))
1341 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
1342 assert isinstance(root, ApiBase.Context)
1343 self._root = weakref.ref(root)
1344 # The dict key is the function name of the pending call, like 'open'
1345 # or 'execve'.
1346 self._pending_calls = {}
1347 self._line_number = 0
1348 # Current directory when the process started.
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001349 if isinstance(self._root(), unicode):
1350 self.initial_cwd = self._root()
1351 else:
1352 self.initial_cwd = self.RelativePath(self._root(), None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001353 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001354 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001355
1356 def get_cwd(self):
1357 """Returns the best known value of cwd."""
1358 return self.cwd or self.initial_cwd
1359
1360 def render(self):
1361 """Returns the string value of the RelativePath() object.
1362
1363 Used by RelativePath. Returns the initial directory and not the
1364 current one since the current directory 'cwd' validity is time-limited.
1365
1366 The validity is only guaranteed once all the logs are processed.
1367 """
1368 return self.initial_cwd.render()
1369
1370 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001371 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001372 self._line_number += 1
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001373 try:
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001374 if self._done:
1375 raise TracingFailure(
1376 'Found a trace for a terminated process or corrupted log',
1377 None, None, None)
1378
1379 if self.RE_SIGNAL.match(line):
1380 # Ignore signals.
1381 return
1382
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001383 match = self.RE_KILLED.match(line)
1384 if match:
1385 # Converts a '+++ killed by Foo +++' trace into an exit_group().
1386 self.handle_exit_group(match.group(1), None)
1387 return
1388
1389 match = self.RE_PROCESS_EXITED.match(line)
1390 if match:
1391 # Converts a '+++ exited with 1 +++' trace into an exit_group()
1392 self.handle_exit_group(match.group(1), None)
1393 return
1394
1395 match = self.RE_UNFINISHED.match(line)
1396 if match:
1397 if match.group(1) in self._pending_calls:
1398 raise TracingFailure(
1399 'Found two unfinished calls for the same function',
1400 None, None, None,
1401 self._pending_calls)
1402 self._pending_calls[match.group(1)] = (
1403 match.group(1) + match.group(2))
1404 return
1405
1406 match = self.RE_UNAVAILABLE.match(line)
1407 if match:
1408 # This usually means a process was killed and a pending call was
1409 # canceled.
1410 # TODO(maruel): Look up the last exit_group() trace just above and
1411 # make sure any self._pending_calls[anything] is properly flushed.
1412 return
1413
1414 match = self.RE_RESUMED.match(line)
1415 if match:
1416 if match.group(1) not in self._pending_calls:
1417 raise TracingFailure(
1418 'Found a resumed call that was not logged as unfinished',
1419 None, None, None,
1420 self._pending_calls)
1421 pending = self._pending_calls.pop(match.group(1))
1422 # Reconstruct the line.
1423 line = pending + match.group(2)
1424
1425 match = self.RE_HEADER.match(line)
1426 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001427 # The line is corrupted. It happens occasionally when a process is
1428 # killed forcibly with activity going on. Assume the process died.
1429 # No other line can be processed afterward.
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001430 logging.debug('%d is done: %s', self.pid, line)
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001431 self._done = True
1432 return
1433
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001434 if match.group(1) == self.UNNAMED_FUNCTION:
1435 return
1436
1437 # It's a valid line, handle it.
1438 handler = getattr(self, 'handle_%s' % match.group(1), None)
1439 if not handler:
1440 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1441 return handler(match.group(2), match.group(3))
1442 except TracingFailure, e:
1443 # Hack in the values since the handler could be a static function.
1444 e.pid = self.pid
1445 e.line = line
1446 e.line_number = self._line_number
1447 # Re-raise the modified exception.
1448 raise
1449 except (KeyError, NotImplementedError, ValueError), e:
1450 raise TracingFailure(
1451 'Trace generated a %s exception: %s' % (
1452 e.__class__.__name__, str(e)),
1453 self.pid,
1454 self._line_number,
1455 line,
1456 e)
1457
1458 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1459 def handle_access(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001460 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001461
1462 @parse_args(r'^\"(.+?)\"$', True)
1463 def handle_chdir(self, args, _result):
1464 """Updates cwd."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001465 self.cwd = self._mangle(args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001466 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1467
maruel@chromium.org538141b2013-06-03 20:57:17 +00001468 @parse_args(r'^\"(.+?)\", (\d+), (\d+)$', False)
1469 def handle_chown(self, args, _result):
1470 # TODO(maruel): Look at result?
1471 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org55550722013-05-28 14:43:14 +00001472
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001473 def handle_clone(self, _args, result):
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001474 self._handling_forking('clone', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001475
1476 def handle_close(self, _args, _result):
1477 pass
1478
maruel@chromium.org538141b2013-06-03 20:57:17 +00001479 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1480 def handle_chmod(self, args, _result):
1481 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001482
maruel@chromium.org538141b2013-06-03 20:57:17 +00001483 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1484 def handle_creat(self, args, _result):
1485 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001486
1487 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1488 def handle_execve(self, args, _result):
1489 # Even if in practice execve() doesn't returns when it succeeds, strace
1490 # still prints '0' as the result.
1491 filepath = args[0]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001492 self._handle_file(filepath, Results.File.READ)
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001493 self.executable = self._mangle(filepath)
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001494 try:
1495 self.command = strace_process_quoted_arguments(args[1])
1496 except ValueError as e:
1497 raise TracingFailure(
1498 'Failed to process command line argument:\n%s' % e.args[0],
1499 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001500
1501 def handle_exit_group(self, _args, _result):
1502 """Removes cwd."""
1503 self.cwd = None
1504
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001505 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1506 def handle_faccessat(self, args, _results):
1507 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001508 self._handle_file(args[1], Results.File.TOUCHED)
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001509 else:
1510 raise Exception('Relative faccess not implemented.')
1511
maruel@chromium.org0781f322013-05-28 19:45:49 +00001512 def handle_fallocate(self, _args, result):
1513 pass
1514
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001515 def handle_fork(self, args, result):
1516 self._handle_unknown('fork', args, result)
1517
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001518 def handle_futex(self, _args, _result):
1519 pass
1520
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001521 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1522 def handle_getcwd(self, args, _result):
1523 if os.path.isabs(args[0]):
1524 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1525 if not isinstance(self.cwd, unicode):
1526 # Take the occasion to reset the path.
1527 self.cwd = self._mangle(args[0])
1528 else:
1529 # It should always match.
1530 assert self.cwd == Strace.load_filename(args[0]), (
1531 self.cwd, args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001532
1533 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1534 def handle_link(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001535 self._handle_file(args[0], Results.File.READ)
1536 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001537
1538 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1539 def handle_lstat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001540 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001541
1542 def handle_mkdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001543 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001544 pass
1545
1546 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1547 def handle_open(self, args, _result):
1548 if 'O_DIRECTORY' in args[1]:
1549 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001550 self._handle_file(
1551 args[0],
1552 Results.File.READ if 'O_RDONLY' in args[1] else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001553
1554 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1555 def handle_openat(self, args, _result):
1556 if 'O_DIRECTORY' in args[2]:
1557 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001558 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001559 self._handle_file(
1560 args[1],
1561 Results.File.READ if 'O_RDONLY' in args[2]
1562 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001563 else:
1564 # TODO(maruel): Implement relative open if necessary instead of the
1565 # AT_FDCWD flag, let's hope not since this means tracking all active
1566 # directory handles.
maruel@chromium.org538141b2013-06-03 20:57:17 +00001567 raise NotImplementedError('Relative open via openat not implemented.')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001568
1569 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1570 def handle_readlink(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001571 self._handle_file(args[0], Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001572
1573 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1574 def handle_rename(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001575 self._handle_file(args[0], Results.File.READ)
1576 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001577
1578 def handle_rmdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001579 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001580 pass
1581
1582 def handle_setxattr(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001583 # TODO(maruel): self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001584 pass
1585
1586 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1587 def handle_stat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001588 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001589
maruel@chromium.org538141b2013-06-03 20:57:17 +00001590 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1591 def handle_symlink(self, args, _result):
1592 self._handle_file(args[0], Results.File.TOUCHED)
1593 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001594
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001595 @parse_args(r'^\"(.+?)\", \d+', True)
1596 def handle_truncate(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001597 self._handle_file(args[0], Results.File.WRITE)
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001598
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001599 def handle_unlink(self, _args, _result):
1600 # In theory, the file had to be created anyway.
1601 pass
1602
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001603 def handle_unlinkat(self, _args, _result):
1604 # In theory, the file had to be created anyway.
1605 pass
1606
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001607 def handle_statfs(self, _args, _result):
1608 pass
1609
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001610 def handle_utimensat(self, _args, _result):
1611 pass
1612
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001613 def handle_vfork(self, _args, result):
1614 self._handling_forking('vfork', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001615
1616 @staticmethod
1617 def _handle_unknown(function, args, result):
1618 raise TracingFailure(
1619 'Unexpected/unimplemented trace %s(%s)= %s' %
1620 (function, args, result),
1621 None, None, None)
1622
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001623 def _handling_forking(self, name, result):
1624 """Transfers cwd."""
1625 if result.startswith(('?', '-1')):
1626 # The call failed.
1627 return
1628 # Update the other process right away.
1629 childpid = int(result)
1630 child = self._root().get_or_set_proc(childpid)
1631 if child.parentid is not None or childpid in self.children:
1632 raise TracingFailure(
1633 'Found internal inconsitency in process lifetime detection '
1634 'during a %s() call' % name,
1635 None, None, None)
1636
1637 # Copy the cwd object.
1638 child.initial_cwd = self.get_cwd()
1639 child.parentid = self.pid
1640 # It is necessary because the logs are processed out of order.
1641 self.children.append(child)
1642
maruel@chromium.org538141b2013-06-03 20:57:17 +00001643 def _handle_file(self, filepath, mode):
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001644 filepath = self._mangle(filepath)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001645 self.add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001646
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001647 def _mangle(self, filepath):
1648 """Decodes a filepath found in the log and convert it to a late-bound
1649 path if necessary.
1650
1651 |filepath| is an strace 'encoded' string and the returned value is
1652 either an unicode string if the path was absolute or a late bound path
1653 otherwise.
1654 """
1655 filepath = Strace.load_filename(filepath)
1656 if os.path.isabs(filepath):
1657 return filepath
1658 else:
1659 if isinstance(self.get_cwd(), unicode):
1660 return os.path.normpath(os.path.join(self.get_cwd(), filepath))
1661 return self.RelativePath(self.get_cwd(), filepath)
1662
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001663 def __init__(self, blacklist, root_pid, initial_cwd):
1664 """|root_pid| may be None when the root process is not known.
1665
1666 In that case, a search is done after reading all the logs to figure out
1667 the root process.
1668 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001669 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001670 assert_is_renderable(initial_cwd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001671 self.root_pid = root_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001672 self.initial_cwd = initial_cwd
1673
1674 def render(self):
1675 """Returns the string value of the initial cwd of the root process.
1676
1677 Used by RelativePath.
1678 """
1679 return self.initial_cwd
1680
1681 def on_line(self, pid, line):
1682 """Transfers control into the Process.on_line() function."""
1683 self.get_or_set_proc(pid).on_line(line.strip())
1684
1685 def to_results(self):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001686 """If necessary, finds back the root process and verify consistency."""
1687 if not self.root_pid:
1688 # The non-sudo case. The traced process was started by strace itself,
1689 # so the pid of the traced process is not known.
1690 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1691 if len(root) == 1:
1692 self.root_process = root[0]
1693 # Save it for later.
1694 self.root_pid = self.root_process.pid
1695 else:
1696 # The sudo case. The traced process was started manually so its pid is
1697 # known.
1698 self.root_process = self._process_lookup.get(self.root_pid)
1699 if not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001700 raise TracingFailure(
1701 'Found internal inconsitency in process lifetime detection '
1702 'while finding the root process',
1703 None,
1704 None,
1705 None,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001706 self.root_pid,
1707 sorted(self._process_lookup))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001708 process = self.root_process.to_results_process()
1709 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1710 raise TracingFailure(
1711 'Found internal inconsitency in process lifetime detection '
1712 'while looking for len(tree) == len(list)',
1713 None,
1714 None,
1715 None,
1716 sorted(self._process_lookup),
1717 sorted(p.pid for p in process.all))
1718 return Results(process)
1719
1720 def get_or_set_proc(self, pid):
1721 """Returns the Context.Process instance for this pid or creates a new one.
1722 """
1723 if not pid or not isinstance(pid, int):
1724 raise TracingFailure(
1725 'Unpexpected value for pid: %r' % pid,
1726 pid,
1727 None,
1728 None,
1729 pid)
1730 if pid not in self._process_lookup:
1731 self._process_lookup[pid] = self.Process(self, pid)
1732 return self._process_lookup[pid]
1733
1734 @classmethod
1735 def traces(cls):
1736 """Returns the list of all handled traces to pass this as an argument to
1737 strace.
1738 """
1739 prefix = 'handle_'
1740 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1741
1742 class Tracer(ApiBase.Tracer):
1743 MAX_LEN = 256
1744
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001745 def __init__(self, logname, use_sudo):
1746 super(Strace.Tracer, self).__init__(logname)
1747 self.use_sudo = use_sudo
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001748 if use_sudo:
1749 # TODO(maruel): Use the jump script systematically to make it easy to
1750 # figure out the root process, so RelativePath is not necessary anymore.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001751 self._child_script = create_exec_thunk()
1752 self._scripts_to_cleanup.append(self._child_script)
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001753
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001754 def trace(self, cmd, cwd, tracename, output):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001755 """Runs strace on an executable.
1756
1757 When use_sudo=True, it is a 3-phases process: start the thunk, start
1758 sudo strace with the pid of the thunk and then have the thunk os.execve()
1759 the process to trace.
1760 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001761 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1762 assert os.path.isabs(cmd[0]), cmd[0]
1763 assert os.path.isabs(cwd), cwd
1764 assert os.path.normpath(cwd) == cwd, cwd
1765 with self._lock:
1766 if not self._initialized:
1767 raise TracingFailure(
1768 'Called Tracer.trace() on an unitialized object',
1769 None, None, None, tracename)
1770 assert tracename not in (i['trace'] for i in self._traces)
1771 stdout = stderr = None
1772 if output:
1773 stdout = subprocess.PIPE
1774 stderr = subprocess.STDOUT
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001775
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001776 # Ensure all file related APIs are hooked.
1777 traces = ','.join(Strace.Context.traces() + ['file'])
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001778 flags = [
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001779 # Each child process has its own trace file. It is necessary because
1780 # strace may generate corrupted log file if multiple processes are
1781 # heavily doing syscalls simultaneously.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001782 '-ff',
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001783 # Reduce whitespace usage.
1784 '-a1',
1785 # hex encode non-ascii strings.
1786 # TODO(maruel): '-x',
1787 # TODO(maruel): '-ttt',
1788 # Signals are unnecessary noise here. Note the parser can cope with them
1789 # but reduce the unnecessary output.
1790 '-esignal=none',
1791 # Print as much data as wanted.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001792 '-s', '%d' % self.MAX_LEN,
1793 '-e', 'trace=%s' % traces,
1794 '-o', self._logname + '.' + tracename,
1795 ]
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001796
1797 if self.use_sudo:
1798 pipe_r, pipe_w = os.pipe()
1799 # Start the child process paused.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001800 target_cmd = [sys.executable, self._child_script, str(pipe_r)] + cmd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001801 logging.debug(' '.join(target_cmd))
1802 child_proc = subprocess.Popen(
1803 target_cmd,
1804 stdin=subprocess.PIPE,
1805 stdout=stdout,
1806 stderr=stderr,
1807 cwd=cwd)
1808
1809 # TODO(maruel): both processes must use the same UID for it to work
1810 # without sudo. Look why -p is failing at the moment without sudo.
1811 trace_cmd = [
1812 'sudo',
1813 'strace',
1814 '-p', str(child_proc.pid),
1815 ] + flags
1816 logging.debug(' '.join(trace_cmd))
1817 strace_proc = subprocess.Popen(
1818 trace_cmd,
1819 cwd=cwd,
1820 stdin=subprocess.PIPE,
1821 stdout=subprocess.PIPE,
1822 stderr=subprocess.PIPE)
1823
1824 line = strace_proc.stderr.readline()
1825 if not re.match(r'^Process \d+ attached \- interrupt to quit$', line):
1826 # TODO(maruel): Raise an exception.
1827 assert False, line
1828
1829 # Now fire the child process.
1830 os.write(pipe_w, 'x')
1831
1832 out = child_proc.communicate()[0]
1833 strace_out = strace_proc.communicate()[0]
1834
1835 # TODO(maruel): if strace_proc.returncode: Add an exception.
1836 saved_out = strace_out if strace_proc.returncode else out
1837 root_pid = child_proc.pid
1838 else:
1839 # Non-sudo case.
1840 trace_cmd = [
1841 'strace',
1842 ] + flags + cmd
1843 logging.debug(' '.join(trace_cmd))
1844 child_proc = subprocess.Popen(
1845 trace_cmd,
1846 cwd=cwd,
1847 stdin=subprocess.PIPE,
1848 stdout=stdout,
1849 stderr=stderr)
1850 out = child_proc.communicate()[0]
1851 # TODO(maruel): Walk the logs and figure out the root process would
1852 # simplify parsing the logs a *lot*.
1853 saved_out = out
1854 # The trace reader will have to figure out.
1855 root_pid = None
1856
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001857 with self._lock:
1858 assert tracename not in (i['trace'] for i in self._traces)
1859 self._traces.append(
1860 {
1861 'cmd': cmd,
1862 'cwd': cwd,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001863 'output': saved_out,
1864 'pid': root_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001865 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001866 })
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001867 return child_proc.returncode, out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001868
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001869 def __init__(self, use_sudo=None):
1870 super(Strace, self).__init__()
1871 self.use_sudo = use_sudo
1872
1873 def get_tracer(self, logname):
1874 return self.Tracer(logname, self.use_sudo)
1875
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001876 @staticmethod
1877 def clean_trace(logname):
1878 if os.path.isfile(logname):
1879 os.remove(logname)
1880 # Also delete any pid specific file from previous traces.
1881 for i in glob.iglob(logname + '.*'):
1882 if i.rsplit('.', 1)[1].isdigit():
1883 os.remove(i)
1884
1885 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001886 def parse_log(cls, logname, blacklist, trace_name):
1887 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001888 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001889 data = read_json(logname)
1890 out = []
1891 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001892 if trace_name and item['trace'] != trace_name:
1893 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001894 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001895 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001896 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001897 }
1898 try:
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001899 context = cls.Context(blacklist, item['pid'], item['cwd'])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001900 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001901 logging.debug('Reading %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001902 pid = pidfile.rsplit('.', 1)[1]
1903 if pid.isdigit():
1904 pid = int(pid)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001905 found_line = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001906 for line in open(pidfile, 'rb'):
1907 context.on_line(pid, line)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001908 found_line = True
1909 if not found_line:
1910 # Ensures that a completely empty trace still creates the
1911 # corresponding Process instance by logging a dummy line.
1912 context.on_line(pid, '')
1913 else:
1914 logging.warning('Found unexpected file %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001915 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001916 except TracingFailure:
1917 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001918 out.append(result)
1919 return out
1920
1921
1922class Dtrace(ApiBase):
1923 """Uses DTrace framework through dtrace. Requires root access.
1924
1925 Implies Mac OSX.
1926
1927 dtruss can't be used because it has compatibility issues with python.
1928
1929 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1930 get the absolute path of the 'cwd' dtrace variable from the probe.
1931
1932 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1933 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1934 manually.
1935
1936 errno is not printed in the log since this implementation currently only cares
1937 about files that were successfully opened.
1938 """
1939 class Context(ApiBase.Context):
1940 # Format: index pid function(args)
1941 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1942
1943 # Arguments parsing.
1944 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1945 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1946 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1947 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1948 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1949 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1950
maruel@chromium.orgac361162013-06-04 15:54:06 +00001951 # O_DIRECTORY is not defined on Windows and dtrace doesn't exist on Windows.
1952 O_DIRECTORY = os.O_DIRECTORY if hasattr(os, 'O_DIRECTORY') else None
maruel@chromium.org538141b2013-06-03 20:57:17 +00001953 O_RDWR = os.O_RDWR
1954 O_WRONLY = os.O_WRONLY
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001955
1956 class Process(ApiBase.Context.Process):
1957 def __init__(self, *args):
1958 super(Dtrace.Context.Process, self).__init__(*args)
1959 self.cwd = self.initial_cwd
1960
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001961 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001962 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001963 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001964 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001965 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001966 # Process ID of the temporary script created by create_subprocess_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001967 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001968 self._initial_cwd = initial_cwd
1969 self._line_number = 0
1970
1971 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001972 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001973 self._line_number += 1
1974 match = self.RE_HEADER.match(line)
1975 if not match:
1976 raise TracingFailure(
1977 'Found malformed line: %s' % line,
1978 None,
1979 self._line_number,
1980 line)
1981 fn = getattr(
1982 self,
1983 'handle_%s' % match.group(2).replace('-', '_'),
1984 self._handle_ignored)
1985 # It is guaranteed to succeed because of the regexp. Or at least I thought
1986 # it would.
1987 pid = int(match.group(1))
1988 try:
1989 return fn(pid, match.group(3))
1990 except TracingFailure, e:
1991 # Hack in the values since the handler could be a static function.
1992 e.pid = pid
1993 e.line = line
1994 e.line_number = self._line_number
1995 # Re-raise the modified exception.
1996 raise
1997 except (KeyError, NotImplementedError, ValueError), e:
1998 raise TracingFailure(
1999 'Trace generated a %s exception: %s' % (
2000 e.__class__.__name__, str(e)),
2001 pid,
2002 self._line_number,
2003 line,
2004 e)
2005
2006 def to_results(self):
2007 process = self.root_process.to_results_process()
2008 # Internal concistency check.
2009 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
2010 raise TracingFailure(
2011 'Found internal inconsitency in process lifetime detection '
2012 'while looking for len(tree) == len(list)',
2013 None,
2014 None,
2015 None,
2016 sorted(self._process_lookup),
2017 sorted(p.pid for p in process.all))
2018 return Results(process)
2019
2020 def handle_dtrace_BEGIN(self, _pid, args):
2021 if not self.RE_DTRACE_BEGIN.match(args):
2022 raise TracingFailure(
2023 'Found internal inconsitency in dtrace_BEGIN log line',
2024 None, None, None)
2025
2026 def handle_proc_start(self, pid, args):
2027 """Transfers cwd.
2028
2029 The dtrace script already takes care of only tracing the processes that
2030 are child of the traced processes so there is no need to verify the
2031 process hierarchy.
2032 """
2033 if pid in self._process_lookup:
2034 raise TracingFailure(
2035 'Found internal inconsitency in proc_start: %d started two times' %
2036 pid,
2037 None, None, None)
2038 match = self.RE_PROC_START.match(args)
2039 if not match:
2040 raise TracingFailure(
2041 'Failed to parse arguments: %s' % args,
2042 None, None, None)
2043 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002044 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002045 proc = self.root_process = self.Process(
2046 self.blacklist, pid, self._initial_cwd)
2047 elif ppid in self._process_lookup:
2048 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
2049 self._process_lookup[ppid].children.append(proc)
2050 else:
2051 # Another process tree, ignore.
2052 return
2053 self._process_lookup[pid] = proc
2054 logging.debug(
2055 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002056 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002057
2058 def handle_proc_exit(self, pid, _args):
2059 """Removes cwd."""
2060 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002061 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002062 self._process_lookup[pid].cwd = None
2063
2064 def handle_execve(self, pid, args):
2065 """Sets the process' executable.
2066
2067 TODO(maruel): Read command line arguments. See
2068 https://discussions.apple.com/thread/1980539 for an example.
2069 https://gist.github.com/1242279
2070
2071 Will have to put the answer at http://stackoverflow.com/questions/7556249.
2072 :)
2073 """
2074 if not pid in self._process_lookup:
2075 # Another process tree, ignore.
2076 return
2077 match = self.RE_EXECVE.match(args)
2078 if not match:
2079 raise TracingFailure(
2080 'Failed to parse arguments: %r' % args,
2081 None, None, None)
2082 proc = self._process_lookup[pid]
2083 proc.executable = match.group(1)
maruel@chromium.org538141b2013-06-03 20:57:17 +00002084 self._handle_file(pid, proc.executable, Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002085 proc.command = self.process_escaped_arguments(match.group(3))
2086 if int(match.group(2)) != len(proc.command):
2087 raise TracingFailure(
2088 'Failed to parse execve() arguments: %s' % args,
2089 None, None, None)
2090
2091 def handle_chdir(self, pid, args):
2092 """Updates cwd."""
2093 if pid not in self._process_lookup:
2094 # Another process tree, ignore.
2095 return
2096 cwd = self.RE_CHDIR.match(args).group(1)
2097 if not cwd.startswith('/'):
2098 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
2099 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
2100 else:
2101 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
2102 cwd2 = cwd
2103 self._process_lookup[pid].cwd = cwd2
2104
2105 def handle_open_nocancel(self, pid, args):
2106 """Redirects to handle_open()."""
2107 return self.handle_open(pid, args)
2108
2109 def handle_open(self, pid, args):
2110 if pid not in self._process_lookup:
2111 # Another process tree, ignore.
2112 return
2113 match = self.RE_OPEN.match(args)
2114 if not match:
2115 raise TracingFailure(
2116 'Failed to parse arguments: %s' % args,
2117 None, None, None)
2118 flag = int(match.group(2), 16)
2119 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
2120 # Ignore directories.
2121 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002122 self._handle_file(
2123 pid,
2124 match.group(1),
2125 Results.File.READ if not ((self.O_RDWR | self.O_WRONLY) & flag)
2126 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002127
2128 def handle_rename(self, pid, args):
2129 if pid not in self._process_lookup:
2130 # Another process tree, ignore.
2131 return
2132 match = self.RE_RENAME.match(args)
2133 if not match:
2134 raise TracingFailure(
2135 'Failed to parse arguments: %s' % args,
2136 None, None, None)
maruel@chromium.org538141b2013-06-03 20:57:17 +00002137 self._handle_file(pid, match.group(1), Results.File.READ)
2138 self._handle_file(pid, match.group(2), Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002139
maruel@chromium.org538141b2013-06-03 20:57:17 +00002140 def _handle_file(self, pid, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002141 if not filepath.startswith('/'):
2142 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
2143 # We can get '..' in the path.
2144 filepath = os.path.normpath(filepath)
2145 # Sadly, still need to filter out directories here;
2146 # saw open_nocancel(".", 0, 0) = 0 lines.
2147 if os.path.isdir(filepath):
2148 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002149 self._process_lookup[pid].add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002150
2151 def handle_ftruncate(self, pid, args):
2152 """Just used as a signal to kill dtrace, ignoring."""
2153 pass
2154
2155 @staticmethod
2156 def _handle_ignored(pid, args):
2157 """Is called for all the event traces that are not handled."""
2158 raise NotImplementedError('Please implement me')
2159
2160 @staticmethod
2161 def process_escaped_arguments(text):
2162 """Extracts escaped arguments on a string and return the arguments as a
2163 list.
2164
2165 Implemented as an automaton.
2166
2167 Example:
2168 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
2169 function will return ['python2.7', '-c', 'print("hi")]
2170 """
2171 if not text.endswith('\\0'):
2172 raise ValueError('String is not null terminated: %r' % text, text)
2173 text = text[:-2]
2174
2175 def unescape(x):
2176 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
2177 out = []
2178 escaped = False
2179 for i in x:
2180 if i == '\\' and not escaped:
2181 escaped = True
2182 continue
2183 escaped = False
2184 out.append(i)
2185 return ''.join(out)
2186
2187 return [unescape(i) for i in text.split('\\001')]
2188
2189 class Tracer(ApiBase.Tracer):
2190 # pylint: disable=C0301
2191 #
2192 # To understand the following code, you'll want to take a look at:
2193 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
2194 # https://wikis.oracle.com/display/DTrace/Variables
2195 # http://docs.oracle.com/cd/E19205-01/820-4221/
2196 #
2197 # 0. Dump all the valid probes into a text file. It is important, you
2198 # want to redirect into a file and you don't want to constantly 'sudo'.
2199 # $ sudo dtrace -l > probes.txt
2200 #
2201 # 1. Count the number of probes:
2202 # $ wc -l probes.txt
2203 # 81823 # On OSX 10.7, including 1 header line.
2204 #
2205 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
2206 # likes and skipping the header with NR>1:
2207 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
2208 # dtrace
2209 # fbt
2210 # io
2211 # ip
2212 # lockstat
2213 # mach_trap
2214 # proc
2215 # profile
2216 # sched
2217 # syscall
2218 # tcp
2219 # vminfo
2220 #
2221 # 3. List of valid probes:
2222 # $ grep syscall probes.txt | less
2223 # or use dtrace directly:
2224 # $ sudo dtrace -l -P syscall | less
2225 #
2226 # trackedpid is an associative array where its value can be 0, 1 or 2.
2227 # 0 is for untracked processes and is the default value for items not
2228 # in the associative array.
2229 # 1 is for tracked processes.
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002230 # 2 is for the script created by create_subprocess_thunk() only. It is not
2231 # tracked itself but all its decendants are.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002232 #
2233 # The script will kill itself only once waiting_to_die == 1 and
2234 # current_processes == 0, so that both getlogin() was called and that
2235 # all traced processes exited.
2236 #
2237 # TODO(maruel): Use cacheable predicates. See
2238 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
2239 D_CODE = """
2240 dtrace:::BEGIN {
2241 waiting_to_die = 0;
2242 current_processes = 0;
2243 logindex = 0;
2244 printf("%d %d %s_%s(\\"%s\\")\\n",
2245 logindex, PID, probeprov, probename, SCRIPT);
2246 logindex++;
2247 }
2248
2249 proc:::start /trackedpid[ppid]/ {
2250 trackedpid[pid] = 1;
2251 current_processes += 1;
2252 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
2253 logindex, pid, probeprov, probename,
2254 ppid,
2255 execname,
2256 current_processes);
2257 logindex++;
2258 }
2259 /* Should use SCRIPT but there is no access to this variable at that
2260 * point. */
2261 proc:::start /ppid == PID && execname == "Python"/ {
2262 trackedpid[pid] = 2;
2263 current_processes += 1;
2264 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
2265 logindex, pid, probeprov, probename,
2266 ppid,
2267 execname,
2268 current_processes);
2269 logindex++;
2270 }
2271 proc:::exit /trackedpid[pid] &&
2272 current_processes == 1 &&
2273 waiting_to_die == 1/ {
2274 trackedpid[pid] = 0;
2275 current_processes -= 1;
2276 printf("%d %d %s_%s(%d)\\n",
2277 logindex, pid, probeprov, probename,
2278 current_processes);
2279 logindex++;
2280 exit(0);
2281 }
2282 proc:::exit /trackedpid[pid]/ {
2283 trackedpid[pid] = 0;
2284 current_processes -= 1;
2285 printf("%d %d %s_%s(%d)\\n",
2286 logindex, pid, probeprov, probename,
2287 current_processes);
2288 logindex++;
2289 }
2290
2291 /* Use an arcane function to detect when we need to die */
2292 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
2293 waiting_to_die = 1;
2294 printf("%d %d %s()\\n", logindex, pid, probefunc);
2295 logindex++;
2296 }
2297 syscall::ftruncate:entry /
2298 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
2299 exit(0);
2300 }
2301
2302 syscall::open*:entry /trackedpid[pid] == 1/ {
2303 self->open_arg0 = arg0;
2304 self->open_arg1 = arg1;
2305 self->open_arg2 = arg2;
2306 }
2307 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
2308 this->open_arg0 = copyinstr(self->open_arg0);
2309 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
2310 logindex, pid, probefunc,
2311 this->open_arg0,
2312 self->open_arg1,
2313 self->open_arg2);
2314 logindex++;
2315 this->open_arg0 = 0;
2316 }
2317 syscall::open*:return /trackedpid[pid] == 1/ {
2318 self->open_arg0 = 0;
2319 self->open_arg1 = 0;
2320 self->open_arg2 = 0;
2321 }
2322
2323 syscall::rename:entry /trackedpid[pid] == 1/ {
2324 self->rename_arg0 = arg0;
2325 self->rename_arg1 = arg1;
2326 }
2327 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
2328 this->rename_arg0 = copyinstr(self->rename_arg0);
2329 this->rename_arg1 = copyinstr(self->rename_arg1);
2330 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
2331 logindex, pid, probefunc,
2332 this->rename_arg0,
2333 this->rename_arg1);
2334 logindex++;
2335 this->rename_arg0 = 0;
2336 this->rename_arg1 = 0;
2337 }
2338 syscall::rename:return /trackedpid[pid] == 1/ {
2339 self->rename_arg0 = 0;
2340 self->rename_arg1 = 0;
2341 }
2342
2343 /* Track chdir, it's painful because it is only receiving relative path.
2344 */
2345 syscall::chdir:entry /trackedpid[pid] == 1/ {
2346 self->chdir_arg0 = arg0;
2347 }
2348 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
2349 this->chdir_arg0 = copyinstr(self->chdir_arg0);
2350 printf("%d %d %s(\\"%s\\")\\n",
2351 logindex, pid, probefunc,
2352 this->chdir_arg0);
2353 logindex++;
2354 this->chdir_arg0 = 0;
2355 }
2356 syscall::chdir:return /trackedpid[pid] == 1/ {
2357 self->chdir_arg0 = 0;
2358 }
2359 """
2360
2361 # execve-specific code, tends to throw a lot of exceptions.
2362 D_CODE_EXECVE = """
2363 /* Finally what we care about! */
2364 syscall::exec*:entry /trackedpid[pid]/ {
2365 self->exec_arg0 = copyinstr(arg0);
2366 /* Incrementally probe for a NULL in the argv parameter of execve() to
2367 * figure out argc. */
2368 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
2369 * found. */
2370 self->exec_argc = 0;
2371 /* Probe for argc==1 */
2372 this->exec_argv = (user_addr_t*)copyin(
2373 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2374 self->exec_argc = this->exec_argv[self->exec_argc] ?
2375 (self->exec_argc + 1) : self->exec_argc;
2376
2377 /* Probe for argc==2 */
2378 this->exec_argv = (user_addr_t*)copyin(
2379 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2380 self->exec_argc = this->exec_argv[self->exec_argc] ?
2381 (self->exec_argc + 1) : self->exec_argc;
2382
2383 /* Probe for argc==3 */
2384 this->exec_argv = (user_addr_t*)copyin(
2385 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2386 self->exec_argc = this->exec_argv[self->exec_argc] ?
2387 (self->exec_argc + 1) : self->exec_argc;
2388
2389 /* Probe for argc==4 */
2390 this->exec_argv = (user_addr_t*)copyin(
2391 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2392 self->exec_argc = this->exec_argv[self->exec_argc] ?
2393 (self->exec_argc + 1) : self->exec_argc;
2394
2395 /* Copy the inputs strings since there is no guarantee they'll be
2396 * present after the call completed. */
2397 self->exec_argv0 = (self->exec_argc > 0) ?
2398 copyinstr(this->exec_argv[0]) : "";
2399 self->exec_argv1 = (self->exec_argc > 1) ?
2400 copyinstr(this->exec_argv[1]) : "";
2401 self->exec_argv2 = (self->exec_argc > 2) ?
2402 copyinstr(this->exec_argv[2]) : "";
2403 self->exec_argv3 = (self->exec_argc > 3) ?
2404 copyinstr(this->exec_argv[3]) : "";
2405 this->exec_argv = 0;
2406 }
2407 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2408 /* We need to join strings here, as using multiple printf() would
2409 * cause tearing when multiple threads/processes are traced.
2410 * Since it is impossible to escape a string and join it to another one,
2411 * like sprintf("%s%S", previous, more), use hackery.
2412 * Each of the elements are split with a \\1. \\0 cannot be used because
2413 * it is simply ignored. This will conflict with any program putting a
2414 * \\1 in their execve() string but this should be "rare enough" */
2415 this->args = "";
2416 /* Process exec_argv[0] */
2417 this->args = strjoin(
2418 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2419
2420 /* Process exec_argv[1] */
2421 this->args = strjoin(
2422 this->args, (self->exec_argc > 1) ? "\\1" : "");
2423 this->args = strjoin(
2424 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2425
2426 /* Process exec_argv[2] */
2427 this->args = strjoin(
2428 this->args, (self->exec_argc > 2) ? "\\1" : "");
2429 this->args = strjoin(
2430 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2431
2432 /* Process exec_argv[3] */
2433 this->args = strjoin(
2434 this->args, (self->exec_argc > 3) ? "\\1" : "");
2435 this->args = strjoin(
2436 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2437
2438 /* Prints self->exec_argc to permits verifying the internal
2439 * consistency since this code is quite fishy. */
2440 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2441 logindex, pid, probefunc,
2442 self->exec_arg0,
2443 self->exec_argc,
2444 this->args);
2445 logindex++;
2446 this->args = 0;
2447 }
2448 syscall::exec*:return /trackedpid[pid]/ {
2449 self->exec_arg0 = 0;
2450 self->exec_argc = 0;
2451 self->exec_argv0 = 0;
2452 self->exec_argv1 = 0;
2453 self->exec_argv2 = 0;
2454 self->exec_argv3 = 0;
2455 }
2456 """
2457
2458 # Code currently not used.
2459 D_EXTRANEOUS = """
2460 /* This is a good learning experience, since it traces a lot of things
2461 * related to the process and child processes.
2462 * Warning: it generates a gigantic log. For example, tracing
2463 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2464 * several minutes to execute.
2465 */
2466 /*
2467 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2468 printf("%d %d %s_%s() = %d\\n",
2469 logindex, pid, probeprov, probefunc, errno);
2470 logindex++;
2471 }
2472 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2473 printf("%d %d %s_%s() = %d\\n",
2474 logindex, pid, probeprov, probefunc, errno);
2475 logindex++;
2476 }
2477 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2478 printf("%d %d %s_%s() = %d\\n",
2479 logindex, pid, probeprov, probefunc, errno);
2480 logindex++;
2481 }
2482 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2483 printf("%d %d %s_%s() = %d\\n",
2484 logindex, pid, probeprov, probefunc, errno);
2485 logindex++;
2486 }
2487 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2488 printf("%d %d %s_%s() = %d\\n",
2489 logindex, pid, probeprov, probefunc, errno);
2490 logindex++;
2491 }
2492 */
2493 /* TODO(maruel): *stat* functions and friends
2494 syscall::access:return,
2495 syscall::chdir:return,
2496 syscall::chflags:return,
2497 syscall::chown:return,
2498 syscall::chroot:return,
2499 syscall::getattrlist:return,
2500 syscall::getxattr:return,
2501 syscall::lchown:return,
2502 syscall::lstat64:return,
2503 syscall::lstat:return,
2504 syscall::mkdir:return,
2505 syscall::pathconf:return,
2506 syscall::readlink:return,
2507 syscall::removexattr:return,
2508 syscall::setxattr:return,
2509 syscall::stat64:return,
2510 syscall::stat:return,
2511 syscall::truncate:return,
2512 syscall::unlink:return,
2513 syscall::utimes:return,
2514 */
2515 """
2516
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002517 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002518 """Starts the log collection with dtrace.
2519
2520 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2521 this needs to wait for dtrace to be "warmed up".
2522 """
2523 super(Dtrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002524 # This script is used as a signal to figure out the root process.
2525 self._signal_script = create_subprocess_thunk()
2526 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002527 # This unique dummy temp file is used to signal the dtrace script that it
2528 # should stop as soon as all the child processes are done. A bit hackish
2529 # but works fine enough.
2530 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2531 prefix='trace_signal_file')
2532
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002533 dtrace_path = '/usr/sbin/dtrace'
2534 if not os.path.isfile(dtrace_path):
2535 dtrace_path = 'dtrace'
2536 elif use_sudo is None and (os.stat(dtrace_path).st_mode & stat.S_ISUID):
2537 # No need to sudo. For those following at home, don't do that.
2538 use_sudo = False
2539
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002540 # Note: do not use the -p flag. It's useless if the initial process quits
2541 # too fast, resulting in missing traces from the grand-children. The D
2542 # code manages the dtrace lifetime itself.
2543 trace_cmd = [
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002544 dtrace_path,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002545 # Use a larger buffer if getting 'out of scratch space' errors.
2546 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2547 '-b', '10m',
2548 '-x', 'dynvarsize=10m',
2549 #'-x', 'dtrace_global_maxsize=1m',
2550 '-x', 'evaltime=exec',
2551 '-o', '/dev/stderr',
2552 '-q',
2553 '-n', self._get_dtrace_code(),
2554 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002555 if use_sudo is not False:
2556 trace_cmd.insert(0, 'sudo')
2557
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002558 with open(self._logname + '.log', 'wb') as logfile:
2559 self._dtrace = subprocess.Popen(
2560 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2561 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2562
2563 # Reads until one line is printed, which signifies dtrace is up and ready.
2564 with open(self._logname + '.log', 'rb') as logfile:
2565 while 'dtrace_BEGIN' not in logfile.readline():
2566 if self._dtrace.poll() is not None:
2567 # Do a busy wait. :/
2568 break
2569 logging.debug('dtrace started')
2570
2571 def _get_dtrace_code(self):
2572 """Setups the D code to implement child process tracking.
2573
2574 Injects the cookie in the script so it knows when to stop.
2575
2576 The script will detect any instance of the script created with
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002577 create_subprocess_thunk() and will start tracing it.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002578 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002579 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002580 'inline int PID = %d;\n'
2581 'inline string SCRIPT = "%s";\n'
2582 'inline int FILE_ID = %d;\n'
2583 '\n'
2584 '%s') % (
2585 os.getpid(),
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002586 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002587 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002588 self.D_CODE)
2589 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2590 # Do not enable by default since it tends to spew dtrace: error lines
2591 # because the execve() parameters are not in valid memory at the time of
2592 # logging.
2593 # TODO(maruel): Find a way to make this reliable since it's useful but
2594 # only works in limited/trivial uses cases for now.
2595 out += self.D_CODE_EXECVE
2596 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002597
2598 def trace(self, cmd, cwd, tracename, output):
2599 """Runs dtrace on an executable.
2600
2601 This dtruss is broken when it starts the process itself or when tracing
2602 child processes, this code starts a wrapper process
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002603 generated with create_subprocess_thunk() which starts the executable to
2604 trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002605 """
2606 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2607 assert os.path.isabs(cmd[0]), cmd[0]
2608 assert os.path.isabs(cwd), cwd
2609 assert os.path.normpath(cwd) == cwd, cwd
2610 with self._lock:
2611 if not self._initialized:
2612 raise TracingFailure(
2613 'Called Tracer.trace() on an unitialized object',
2614 None, None, None, tracename)
2615 assert tracename not in (i['trace'] for i in self._traces)
2616
2617 # Starts the script wrapper to start the child process. This signals the
2618 # dtrace script that this process is to be traced.
2619 stdout = stderr = None
2620 if output:
2621 stdout = subprocess.PIPE
2622 stderr = subprocess.STDOUT
2623 child_cmd = [
2624 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002625 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002626 tracename,
2627 ]
2628 # Call a dummy function so that dtrace knows I'm about to launch a process
2629 # that needs to be traced.
2630 # Yummy.
2631 child = subprocess.Popen(
2632 child_cmd + fix_python_path(cmd),
2633 stdin=subprocess.PIPE,
2634 stdout=stdout,
2635 stderr=stderr,
2636 cwd=cwd)
2637 logging.debug('Started child pid: %d' % child.pid)
2638
2639 out = child.communicate()[0]
2640 # This doesn't mean tracing is done, one of the grand-child process may
2641 # still be alive. It will be tracked with the dtrace script.
2642
2643 with self._lock:
2644 assert tracename not in (i['trace'] for i in self._traces)
2645 self._traces.append(
2646 {
2647 'cmd': cmd,
2648 'cwd': cwd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002649 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002650 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002651 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002652 })
2653 return child.returncode, out
2654
2655 def close(self, timeout=None):
2656 """Terminates dtrace."""
2657 logging.debug('close(%s)' % timeout)
2658 try:
2659 try:
2660 super(Dtrace.Tracer, self).close(timeout)
2661 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002662 # ftruncate doesn't exist on Windows.
2663 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002664 if timeout:
2665 start = time.time()
2666 # Use polling. :/
2667 while (self._dtrace.poll() is None and
2668 (time.time() - start) < timeout):
2669 time.sleep(0.1)
2670 self._dtrace.kill()
2671 self._dtrace.wait()
2672 finally:
2673 # Make sure to kill it in any case.
2674 if self._dtrace.poll() is None:
2675 try:
2676 self._dtrace.kill()
2677 self._dtrace.wait()
2678 except OSError:
2679 pass
2680
2681 if self._dtrace.returncode != 0:
2682 # Warn about any dtrace failure but basically ignore it.
2683 print 'dtrace failure: %s' % self._dtrace.returncode
2684 finally:
2685 os.close(self._dummy_file_id)
2686 os.remove(self._dummy_file_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002687
2688 def post_process_log(self):
2689 """Sorts the log back in order when each call occured.
2690
2691 dtrace doesn't save the buffer in strict order since it keeps one buffer
2692 per CPU.
2693 """
2694 super(Dtrace.Tracer, self).post_process_log()
2695 logname = self._logname + '.log'
2696 with open(logname, 'rb') as logfile:
2697 lines = [l for l in logfile if l.strip()]
2698 errors = [l for l in lines if l.startswith('dtrace:')]
2699 if errors:
2700 raise TracingFailure(
2701 'Found errors in the trace: %s' % '\n'.join(errors),
2702 None, None, None, logname)
2703 try:
2704 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2705 except ValueError:
2706 raise TracingFailure(
2707 'Found errors in the trace: %s' % '\n'.join(
2708 l for l in lines if l.split(' ', 1)[0].isdigit()),
2709 None, None, None, logname)
2710 with open(logname, 'wb') as logfile:
2711 logfile.write(''.join(lines))
2712
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002713 def __init__(self, use_sudo=None):
2714 super(Dtrace, self).__init__()
2715 self.use_sudo = use_sudo
2716
2717 def get_tracer(self, logname):
2718 return self.Tracer(logname, self.use_sudo)
2719
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002720 @staticmethod
2721 def clean_trace(logname):
2722 for ext in ('', '.log'):
2723 if os.path.isfile(logname + ext):
2724 os.remove(logname + ext)
2725
2726 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002727 def parse_log(cls, logname, blacklist, trace_name):
2728 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002729 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002730
2731 def blacklist_more(filepath):
2732 # All the HFS metadata is in the form /.vol/...
2733 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2734
2735 data = read_json(logname)
2736 out = []
2737 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002738 if trace_name and item['trace'] != trace_name:
2739 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002740 result = {
2741 'output': item['output'],
2742 'trace': item['trace'],
2743 }
2744 try:
2745 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002746 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2747 # be valid UTF-8 and we control the log output.
2748 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002749 context.on_line(line)
2750 result['results'] = context.to_results()
2751 except TracingFailure:
2752 result['exception'] = sys.exc_info()
2753 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002754 return out
2755
2756
2757class LogmanTrace(ApiBase):
2758 """Uses the native Windows ETW based tracing functionality to trace a child
2759 process.
2760
2761 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2762 the Windows Kernel doesn't have a concept of 'current working directory' at
2763 all. A Win32 process has a map of current directories, one per drive letter
2764 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2765 opened relative to another file_object or as an absolute path. All the current
2766 working directory logic is done in user mode.
2767 """
2768 class Context(ApiBase.Context):
2769 """Processes a ETW log line and keeps the list of existent and non
2770 existent files accessed.
2771
2772 Ignores directories.
2773 """
2774 # These indexes are for the stripped version in json.
2775 EVENT_NAME = 0
2776 TYPE = 1
2777 PID = 2
2778 TID = 3
2779 PROCESSOR_ID = 4
2780 TIMESTAMP = 5
2781 USER_DATA = 6
2782
2783 class Process(ApiBase.Context.Process):
2784 def __init__(self, *args):
2785 super(LogmanTrace.Context.Process, self).__init__(*args)
2786 # Handle file objects that succeeded.
2787 self.file_objects = {}
2788
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002789 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2790 logging.info(
2791 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2792 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002793 super(LogmanTrace.Context, self).__init__(blacklist)
2794 self._drive_map = DosDriveMap()
2795 # Threads mapping to the corresponding process id.
2796 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002797 # Process ID of the tracer, e.g. the temporary script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002798 # create_subprocess_thunk(). This is tricky because the process id may
2799 # have been reused.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002800 self._thunk_pid = thunk_pid
2801 self._thunk_cmd = thunk_cmd
2802 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002803 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002804 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002805
2806 def on_line(self, line):
2807 """Processes a json Event line."""
2808 self._line_number += 1
2809 try:
2810 # By Opcode
2811 handler = getattr(
2812 self,
2813 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2814 None)
2815 if not handler:
2816 raise TracingFailure(
2817 'Unexpected event %s_%s' % (
2818 line[self.EVENT_NAME], line[self.TYPE]),
2819 None, None, None)
2820 handler(line)
2821 except TracingFailure, e:
2822 # Hack in the values since the handler could be a static function.
2823 e.pid = line[self.PID]
2824 e.line = line
2825 e.line_number = self._line_number
2826 # Re-raise the modified exception.
2827 raise
2828 except (KeyError, NotImplementedError, ValueError), e:
2829 raise TracingFailure(
2830 'Trace generated a %s exception: %s' % (
2831 e.__class__.__name__, str(e)),
2832 line[self.PID],
2833 self._line_number,
2834 line,
2835 e)
2836
2837 def to_results(self):
2838 if not self.root_process:
2839 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002840 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002841 None, None, None)
2842 process = self.root_process.to_results_process()
2843 return Results(process)
2844
2845 def _thread_to_process(self, tid):
2846 """Finds the process from the thread id."""
2847 tid = int(tid, 16)
2848 pid = self._threads_active.get(tid)
2849 if not pid or not self._process_lookup.get(pid):
2850 return
2851 return self._process_lookup[pid]
2852
2853 @classmethod
2854 def handle_EventTrace_Header(cls, line):
2855 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2856 BUFFER_SIZE = cls.USER_DATA
2857 #VERSION = cls.USER_DATA + 1
2858 #PROVIDER_VERSION = cls.USER_DATA + 2
2859 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2860 #END_TIME = cls.USER_DATA + 4
2861 #TIMER_RESOLUTION = cls.USER_DATA + 5
2862 #MAX_FILE_SIZE = cls.USER_DATA + 6
2863 #LOG_FILE_MODE = cls.USER_DATA + 7
2864 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2865 #START_BUFFERS = cls.USER_DATA + 9
2866 #POINTER_SIZE = cls.USER_DATA + 10
2867 EVENTS_LOST = cls.USER_DATA + 11
2868 #CPU_SPEED = cls.USER_DATA + 12
2869 #LOGGER_NAME = cls.USER_DATA + 13
2870 #LOG_FILE_NAME = cls.USER_DATA + 14
2871 #BOOT_TIME = cls.USER_DATA + 15
2872 #PERF_FREQ = cls.USER_DATA + 16
2873 #START_TIME = cls.USER_DATA + 17
2874 #RESERVED_FLAGS = cls.USER_DATA + 18
2875 #BUFFERS_LOST = cls.USER_DATA + 19
2876 #SESSION_NAME_STRING = cls.USER_DATA + 20
2877 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2878 if line[EVENTS_LOST] != '0':
2879 raise TracingFailure(
2880 ( '%s events were lost during trace, please increase the buffer '
2881 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2882 None, None, None)
2883
2884 def handle_FileIo_Cleanup(self, line):
2885 """General wisdom: if a file is closed, it's because it was opened.
2886
2887 Note that FileIo_Close is not used since if a file was opened properly but
2888 not closed before the process exits, only Cleanup will be logged.
2889 """
2890 #IRP = self.USER_DATA
2891 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2892 FILE_OBJECT = self.USER_DATA + 2
2893 #FILE_KEY = self.USER_DATA + 3
2894 proc = self._thread_to_process(line[TTID])
2895 if not proc:
2896 # Not a process we care about.
2897 return
2898 file_object = line[FILE_OBJECT]
2899 if file_object in proc.file_objects:
maruel@chromium.org538141b2013-06-03 20:57:17 +00002900 filepath, access_type = proc.file_objects.pop(file_object)
2901 proc.add_file(filepath, access_type)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002902
2903 def handle_FileIo_Create(self, line):
2904 """Handles a file open.
2905
2906 All FileIo events are described at
2907 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2908 for some value of 'description'.
2909
2910 " (..) process and thread id values of the IO events (..) are not valid "
2911 http://msdn.microsoft.com/magazine/ee358703.aspx
2912
2913 The FileIo.Create event doesn't return if the CreateFile() call
2914 succeeded, so keep track of the file_object and check that it is
2915 eventually closed with FileIo_Cleanup.
2916 """
2917 #IRP = self.USER_DATA
2918 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2919 FILE_OBJECT = self.USER_DATA + 2
2920 #CREATE_OPTIONS = self.USER_DATA + 3
2921 #FILE_ATTRIBUTES = self.USER_DATA + 4
maruel@chromium.org538141b2013-06-03 20:57:17 +00002922 #SHARE_ACCESS = self.USER_DATA + 5
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002923 OPEN_PATH = self.USER_DATA + 6
2924
2925 proc = self._thread_to_process(line[TTID])
2926 if not proc:
2927 # Not a process we care about.
2928 return
2929
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002930 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002931 # Ignore directories and bare drive right away.
2932 if raw_path.endswith(os.path.sep):
2933 return
2934 filepath = self._drive_map.to_win32(raw_path)
2935 # Ignore bare drive right away. Some may still fall through with format
2936 # like '\\?\X:'
2937 if len(filepath) == 2:
2938 return
2939 file_object = line[FILE_OBJECT]
2940 if os.path.isdir(filepath):
2941 # There is no O_DIRECTORY equivalent on Windows. The closed is
2942 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2943 # simply discard directories are they are found.
2944 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002945 # Override any stale file object.
2946 # TODO(maruel): Figure out a way to detect if the file was opened for
2947 # reading or writting. Sadly CREATE_OPTIONS doesn't seem to be of any help
2948 # here. For now mark as None to make it clear we have no idea what it is
2949 # about.
2950 proc.file_objects[file_object] = (filepath, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002951
2952 def handle_FileIo_Rename(self, line):
2953 # TODO(maruel): Handle?
2954 pass
2955
2956 def handle_Process_End(self, line):
2957 pid = line[self.PID]
2958 if self._process_lookup.get(pid):
2959 logging.info('Terminated: %d' % pid)
2960 self._process_lookup[pid] = None
2961 else:
2962 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002963 if self._thunk_process and self._thunk_process.pid == pid:
2964 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002965
2966 def handle_Process_Start(self, line):
2967 """Handles a new child process started by PID."""
2968 #UNIQUE_PROCESS_KEY = self.USER_DATA
2969 PROCESS_ID = self.USER_DATA + 1
2970 #PARENT_PID = self.USER_DATA + 2
2971 #SESSION_ID = self.USER_DATA + 3
2972 #EXIT_STATUS = self.USER_DATA + 4
2973 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2974 #USER_SID = self.USER_DATA + 6
2975 IMAGE_FILE_NAME = self.USER_DATA + 7
2976 COMMAND_LINE = self.USER_DATA + 8
2977
2978 ppid = line[self.PID]
2979 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002980 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002981 logging.debug(
2982 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002983 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002984
2985 if pid == self._thunk_pid:
2986 # Need to ignore processes we don't know about because the log is
2987 # system-wide. self._thunk_pid shall start only one process.
2988 # This is tricky though because Windows *loves* to reuse process id and
2989 # it happens often that the process ID of the thunk script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002990 # create_subprocess_thunk() is reused. So just detecting the pid here is
2991 # not sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002992 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2993 logging.info(
2994 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2995 pid, self._trace_name, command_line, self._thunk_cmd)
2996 return
2997
2998 # TODO(maruel): The check is quite weak. Add the thunk path.
2999 if self._thunk_process:
3000 raise TracingFailure(
3001 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
3002 'already set') % (self._thunk_pid, self._thunk_process.pid),
3003 None, None, None)
3004 proc = self.Process(self.blacklist, pid, None)
3005 self._thunk_process = proc
3006 return
3007 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003008 proc = self.Process(self.blacklist, pid, None)
3009 self.root_process = proc
3010 ppid = None
3011 elif self._process_lookup.get(ppid):
3012 proc = self.Process(self.blacklist, pid, None)
3013 self._process_lookup[ppid].children.append(proc)
3014 else:
3015 # Ignore
3016 return
3017 self._process_lookup[pid] = proc
3018
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003019 proc.command = command_line
3020 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003021 # proc.command[0] may be the absolute path of 'executable' but it may be
3022 # anything else too. If it happens that command[0] ends with executable,
3023 # use it, otherwise defaults to the base name.
3024 cmd0 = proc.command[0].lower()
3025 if not cmd0.endswith('.exe'):
3026 # TODO(maruel): That's not strictly true either.
3027 cmd0 += '.exe'
3028 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
3029 # Fix the path.
3030 cmd0 = cmd0.replace('/', os.path.sep)
3031 cmd0 = os.path.normpath(cmd0)
3032 proc.executable = get_native_path_case(cmd0)
3033 logging.info(
3034 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
3035
3036 def handle_Thread_End(self, line):
3037 """Has the same parameters as Thread_Start."""
3038 tid = int(line[self.TID], 16)
3039 self._threads_active.pop(tid, None)
3040
3041 def handle_Thread_Start(self, line):
3042 """Handles a new thread created.
3043
3044 Do not use self.PID here since a process' initial thread is created by
3045 the parent process.
3046 """
3047 PROCESS_ID = self.USER_DATA
3048 TTHREAD_ID = self.USER_DATA + 1
3049 #STACK_BASE = self.USER_DATA + 2
3050 #STACK_LIMIT = self.USER_DATA + 3
3051 #USER_STACK_BASE = self.USER_DATA + 4
3052 #USER_STACK_LIMIT = self.USER_DATA + 5
3053 #AFFINITY = self.USER_DATA + 6
3054 #WIN32_START_ADDR = self.USER_DATA + 7
3055 #TEB_BASE = self.USER_DATA + 8
3056 #SUB_PROCESS_TAG = self.USER_DATA + 9
3057 #BASE_PRIORITY = self.USER_DATA + 10
3058 #PAGE_PRIORITY = self.USER_DATA + 11
3059 #IO_PRIORITY = self.USER_DATA + 12
3060 #THREAD_FLAGS = self.USER_DATA + 13
3061 # Do not use self.PID here since a process' initial thread is created by
3062 # the parent process.
3063 pid = int(line[PROCESS_ID], 16)
3064 tid = int(line[TTHREAD_ID], 16)
3065 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
3066 self._threads_active[tid] = pid
3067
3068 @classmethod
3069 def supported_events(cls):
3070 """Returns all the procesed events."""
3071 out = []
3072 for member in dir(cls):
3073 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
3074 if match:
3075 out.append(match.groups())
3076 return out
3077
3078 class Tracer(ApiBase.Tracer):
3079 # The basic headers.
3080 EXPECTED_HEADER = [
3081 u'Event Name',
3082 u'Type',
3083 u'Event ID',
3084 u'Version',
3085 u'Channel',
3086 u'Level', # 5
3087 u'Opcode',
3088 u'Task',
3089 u'Keyword',
3090 u'PID',
3091 u'TID', # 10
3092 u'Processor Number',
3093 u'Instance ID',
3094 u'Parent Instance ID',
3095 u'Activity ID',
3096 u'Related Activity ID', # 15
3097 u'Clock-Time',
3098 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
3099 u'User(ms)', # pretty much useless.
3100 u'User Data', # Extra arguments that are event-specific.
3101 ]
3102 # Only the useful headers common to all entries are listed there. Any column
3103 # at 19 or higher is dependent on the specific event.
3104 EVENT_NAME = 0
3105 TYPE = 1
3106 PID = 9
3107 TID = 10
3108 PROCESSOR_ID = 11
3109 TIMESTAMP = 16
3110 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
3111 USER_DATA = 19
3112
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003113 class CsvReader(object):
3114 """CSV reader that reads files generated by tracerpt.exe.
3115
3116 csv.reader() fails to read them properly, it mangles file names quoted
3117 with "" with a comma in it.
3118 """
3119 # 0. Had a ',' or one of the following ' ' after a comma, next should
3120 # be ' ', '"' or string or ',' for an empty field.
3121 ( HAD_DELIMITER,
3122 # 1. Processing an unquoted field up to ','.
3123 IN_STR,
3124 # 2. Processing a new field starting with '"'.
3125 STARTING_STR_QUOTED,
3126 # 3. Second quote in a row at the start of a field. It could be either
3127 # '""foo""' or '""'. Who the hell thought it was a great idea to use
3128 # the same character for delimiting and escaping?
3129 STARTING_SECOND_QUOTE,
3130 # 4. A quote inside a quoted string where the previous character was
3131 # not a quote, so the string is not empty. Can be either: end of a
3132 # quoted string (a delimiter) or a quote escape. The next char must be
3133 # either '"' or ','.
3134 HAD_QUOTE_IN_QUOTED,
3135 # 5. Second quote inside a quoted string.
3136 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
3137 # 6. Processing a field that started with '"'.
3138 IN_STR_QUOTED) = range(7)
3139
3140 def __init__(self, f):
3141 self.f = f
3142
3143 def __iter__(self):
3144 return self
3145
3146 def next(self):
3147 """Splits the line in fields."""
3148 line = self.f.readline()
3149 if not line:
3150 raise StopIteration()
3151 line = line.strip()
3152 fields = []
3153 state = self.HAD_DELIMITER
3154 for i, c in enumerate(line):
3155 if state == self.HAD_DELIMITER:
3156 if c == ',':
3157 # Empty field.
3158 fields.append('')
3159 elif c == ' ':
3160 # Ignore initial whitespaces
3161 pass
3162 elif c == '"':
3163 state = self.STARTING_STR_QUOTED
3164 fields.append('')
3165 else:
3166 # Start of a new field.
3167 state = self.IN_STR
3168 fields.append(c)
3169
3170 elif state == self.IN_STR:
3171 # Do not accept quote inside unquoted field.
3172 assert c != '"', (i, c, line, fields)
3173 if c == ',':
3174 fields[-1] = fields[-1].strip()
3175 state = self.HAD_DELIMITER
3176 else:
3177 fields[-1] = fields[-1] + c
3178
3179 elif state == self.STARTING_STR_QUOTED:
3180 if c == '"':
3181 # Do not store the character yet.
3182 state = self.STARTING_SECOND_QUOTE
3183 else:
3184 state = self.IN_STR_QUOTED
3185 fields[-1] = fields[-1] + c
3186
3187 elif state == self.STARTING_SECOND_QUOTE:
3188 if c == ',':
3189 # It was an empty field. '""' == ''.
3190 state = self.HAD_DELIMITER
3191 else:
3192 fields[-1] = fields[-1] + '"' + c
3193 state = self.IN_STR_QUOTED
3194
3195 elif state == self.HAD_QUOTE_IN_QUOTED:
3196 if c == ',':
3197 # End of the string.
3198 state = self.HAD_DELIMITER
3199 elif c == '"':
3200 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
3201 else:
3202 # The previous double-quote was just an unescaped quote.
3203 fields[-1] = fields[-1] + '"' + c
3204 state = self.IN_STR_QUOTED
3205
3206 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
3207 if c == ',':
3208 # End of the string.
3209 state = self.HAD_DELIMITER
3210 fields[-1] = fields[-1] + '"'
3211 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00003212 # That's just how the logger rolls. Revert back to appending the
3213 # char and "guess" it was a quote in a double-quoted string.
3214 state = self.IN_STR_QUOTED
3215 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003216
3217 elif state == self.IN_STR_QUOTED:
3218 if c == '"':
3219 # Could be a delimiter or an escape.
3220 state = self.HAD_QUOTE_IN_QUOTED
3221 else:
3222 fields[-1] = fields[-1] + c
3223
3224 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
3225 fields[-1] = fields[-1] + '"'
3226 else:
3227 assert state in (
3228 # Terminated with a normal field.
3229 self.IN_STR,
3230 # Terminated with an empty field.
3231 self.STARTING_SECOND_QUOTE,
3232 # Terminated with a normal quoted field.
3233 self.HAD_QUOTE_IN_QUOTED), (
3234 line, state, fields)
3235 return fields
3236
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003237 def __init__(self, logname):
3238 """Starts the log collection.
3239
3240 Requires administrative access. logman.exe is synchronous so no need for a
3241 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
3242 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
3243
3244 One can get the list of potentially interesting providers with:
3245 "logman query providers | findstr /i file"
3246 """
3247 super(LogmanTrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00003248 self._signal_script = create_subprocess_thunk()
3249 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003250 cmd_start = [
3251 'logman.exe',
3252 'start',
3253 'NT Kernel Logger',
3254 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
3255 # splitio,fileiocompletion,syscall,file,cswitch,img
3256 '(process,fileio,thread)',
3257 '-o', self._logname + '.etl',
3258 '-ets', # Send directly to kernel
3259 # Values extracted out of thin air.
3260 # Event Trace Session buffer size in kb.
3261 '-bs', '10240',
3262 # Number of Event Trace Session buffers.
3263 '-nb', '16', '256',
3264 ]
3265 logging.debug('Running: %s' % cmd_start)
3266 try:
3267 subprocess.check_call(
3268 cmd_start,
3269 stdin=subprocess.PIPE,
3270 stdout=subprocess.PIPE,
3271 stderr=subprocess.STDOUT)
3272 except subprocess.CalledProcessError, e:
3273 if e.returncode == -2147024891:
3274 print >> sys.stderr, 'Please restart with an elevated admin prompt'
3275 elif e.returncode == -2144337737:
3276 print >> sys.stderr, (
3277 'A kernel trace was already running, stop it and try again')
3278 raise
3279
3280 def trace(self, cmd, cwd, tracename, output):
3281 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
3282 assert os.path.isabs(cmd[0]), cmd[0]
3283 assert os.path.isabs(cwd), cwd
3284 assert os.path.normpath(cwd) == cwd, cwd
3285 with self._lock:
3286 if not self._initialized:
3287 raise TracingFailure(
3288 'Called Tracer.trace() on an unitialized object',
3289 None, None, None, tracename)
3290 assert tracename not in (i['trace'] for i in self._traces)
3291
3292 # Use "logman -?" for help.
3293
3294 stdout = stderr = None
3295 if output:
3296 stdout = subprocess.PIPE
3297 stderr = subprocess.STDOUT
3298
3299 # Run the child process.
3300 logging.debug('Running: %s' % cmd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00003301 # Use the temporary script generated with create_subprocess_thunk() so we
3302 # have a clear pid owner. Since trace_inputs.py can be used as a library
3303 # and could trace multiple processes simultaneously, it makes it more
3304 # complex if the executable to be traced is executed directly here. It
3305 # also solves issues related to logman.exe that needs to be executed to
3306 # control the kernel trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003307 child_cmd = [
3308 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00003309 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003310 tracename,
3311 ]
3312 child = subprocess.Popen(
3313 child_cmd + fix_python_path(cmd),
3314 cwd=cwd,
3315 stdin=subprocess.PIPE,
3316 stdout=stdout,
3317 stderr=stderr)
3318 logging.debug('Started child pid: %d' % child.pid)
3319 out = child.communicate()[0]
3320 # This doesn't mean all the grand-children are done. Sadly, we don't have
3321 # a good way to determine that.
3322
3323 with self._lock:
3324 assert tracename not in (i['trace'] for i in self._traces)
3325 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00003326 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003327 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00003328 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003329 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00003330 # Used to figure out the real process when process ids are reused.
3331 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003332 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003333 })
3334
3335 return child.returncode, out
3336
3337 def close(self, _timeout=None):
3338 """Stops the kernel log collection and converts the traces to text
3339 representation.
3340 """
3341 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00003342 try:
3343 super(LogmanTrace.Tracer, self).close()
3344 finally:
3345 cmd_stop = [
3346 'logman.exe',
3347 'stop',
3348 'NT Kernel Logger',
3349 '-ets', # Sends the command directly to the kernel.
3350 ]
3351 logging.debug('Running: %s' % cmd_stop)
3352 subprocess.check_call(
3353 cmd_stop,
3354 stdin=subprocess.PIPE,
3355 stdout=subprocess.PIPE,
3356 stderr=subprocess.STDOUT)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003357
3358 def post_process_log(self):
3359 """Converts the .etl file into .csv then into .json."""
3360 super(LogmanTrace.Tracer, self).post_process_log()
3361 logformat = 'csv'
3362 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003363 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003364
maruel@chromium.org808e3f22013-05-16 16:42:54 +00003365 def _gen_logdata(self):
3366 return {
3367 'format': 'csv',
3368 'traces': self._traces,
3369 }
3370
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003371 def _trim_log(self, logformat):
3372 """Reduces the amount of data in original log by generating a 'reduced'
3373 log.
3374 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003375 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003376 file_handle = codecs.open(
3377 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003378
3379 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003380 assert sys.getfilesystemencoding() == 'mbcs'
3381 file_handle = codecs.open(
3382 self._logname + '.' + logformat, 'r',
3383 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003384
3385 supported_events = LogmanTrace.Context.supported_events()
3386
3387 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003388 """Loads items from the generator and returns the interesting data.
3389
3390 It filters out any uninteresting line and reduce the amount of data in
3391 the trace.
3392 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003393 for index, line in enumerate(generator):
3394 if not index:
3395 if line != self.EXPECTED_HEADER:
3396 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003397 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003398 None, None, None)
3399 continue
3400 # As you can see, the CSV is full of useful non-redundant information:
3401 if (line[2] != '0' or # Event ID
3402 line[3] not in ('2', '3') or # Version
3403 line[4] != '0' or # Channel
3404 line[5] != '0' or # Level
3405 line[7] != '0' or # Task
3406 line[8] != '0x0000000000000000' or # Keyword
3407 line[12] != '' or # Instance ID
3408 line[13] != '' or # Parent Instance ID
3409 line[14] != self.NULL_GUID or # Activity ID
3410 line[15] != ''): # Related Activity ID
3411 raise TracingFailure(
3412 'Found unexpected values in line: %s' % ' '.join(line),
3413 None, None, None)
3414
3415 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3416 continue
3417
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003418 yield [
3419 line[self.EVENT_NAME],
3420 line[self.TYPE],
3421 line[self.PID],
3422 line[self.TID],
3423 line[self.PROCESSOR_ID],
3424 line[self.TIMESTAMP],
3425 ] + line[self.USER_DATA:]
3426
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003427 # must not convert the trim() call into a list, since it will use too much
3428 # memory for large trace. use a csv file as a workaround since the json
3429 # parser requires a complete in-memory file.
3430 with open('%s.preprocessed' % self._logname, 'wb') as f:
3431 # $ and * can't be used in file name on windows, reducing the likelihood
3432 # of having to escape a string.
3433 out = csv.writer(
3434 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003435 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003436 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003437
3438 def _convert_log(self, logformat):
3439 """Converts the ETL trace to text representation.
3440
3441 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3442 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3443
3444 Arguments:
3445 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3446
3447 Use "tracerpt -?" for help.
3448 """
3449 LOCALE_INVARIANT = 0x7F
3450 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3451 cmd_convert = [
3452 'tracerpt.exe',
3453 '-l', self._logname + '.etl',
3454 '-o', self._logname + '.' + logformat,
3455 '-gmt', # Use UTC
3456 '-y', # No prompt
3457 # Use -of XML to get the header of each items after column 19, e.g. all
3458 # the actual headers of 'User Data'.
3459 ]
3460
3461 if logformat == 'csv':
3462 # tracerpt localizes the 'Type' column, for major brainfuck
3463 # entertainment. I can't imagine any sane reason to do that.
3464 cmd_convert.extend(['-of', 'CSV'])
3465 elif logformat == 'csv_utf16':
3466 # This causes it to use UTF-16, which doubles the log size but ensures
3467 # the log is readable for non-ASCII characters.
3468 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3469 elif logformat == 'xml':
3470 cmd_convert.extend(['-of', 'XML'])
3471 else:
3472 raise ValueError('Unexpected log format \'%s\'' % logformat)
3473 logging.debug('Running: %s' % cmd_convert)
3474 # This can takes tens of minutes for large logs.
3475 # Redirects all output to stderr.
3476 subprocess.check_call(
3477 cmd_convert,
3478 stdin=subprocess.PIPE,
3479 stdout=sys.stderr,
3480 stderr=sys.stderr)
3481
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003482 def __init__(self, use_sudo=False): # pylint: disable=W0613
3483 super(LogmanTrace, self).__init__()
3484 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3485
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003486 @staticmethod
3487 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003488 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003489 if os.path.isfile(logname + ext):
3490 os.remove(logname + ext)
3491
3492 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003493 def parse_log(cls, logname, blacklist, trace_name):
3494 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003495 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003496
3497 def blacklist_more(filepath):
3498 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3499 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3500
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003501 # Create a list of (Context, result_dict) tuples. This is necessary because
3502 # the csv file may be larger than the amount of available memory.
3503 contexes = [
3504 (
3505 cls.Context(
3506 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3507 {
3508 'output': item['output'],
3509 'trace': item['trace'],
3510 },
3511 )
3512 for item in read_json(logname)['traces']
3513 if not trace_name or item['trace'] == trace_name
3514 ]
3515
3516 # The log may be too large to fit in memory and it is not efficient to read
3517 # it multiple times, so multiplex the contexes instead, which is slightly
3518 # more awkward.
3519 with open('%s.preprocessed' % logname, 'rb') as f:
3520 lines = csv.reader(
3521 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3522 for encoded in lines:
3523 line = [s.decode('utf-8') for s in encoded]
3524 # Convert the PID in-place from hex.
3525 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3526 for context in contexes:
3527 if 'exception' in context[1]:
3528 continue
3529 try:
3530 context[0].on_line(line)
3531 except TracingFailure:
3532 context[1]['exception'] = sys.exc_info()
3533
3534 for context in contexes:
3535 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003536 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003537 context[1]['results'] = context[0].to_results()
3538
3539 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003540
3541
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003542def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003543 """Returns the correct implementation for the current OS."""
3544 if sys.platform == 'cygwin':
3545 raise NotImplementedError(
3546 'Not implemented for cygwin, start the script from Win32 python')
3547 flavors = {
3548 'win32': LogmanTrace,
3549 'darwin': Dtrace,
3550 'sunos5': Dtrace,
3551 'freebsd7': Dtrace,
3552 'freebsd8': Dtrace,
3553 }
3554 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003555 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003556
3557
3558def extract_directories(root_dir, files, blacklist):
3559 """Detects if all the files in a directory are in |files| and if so, replace
3560 the individual files by a Results.Directory instance.
3561
3562 Takes a list of Results.File instances and returns a shorter list of
3563 Results.File and Results.Directory instances.
3564
3565 Arguments:
3566 - root_dir: Optional base directory that shouldn't be search further.
3567 - files: list of Results.File instances.
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003568 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003569 """
3570 logging.info(
3571 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3572 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003573 # It is important for root_dir to not be a symlinked path, make sure to call
3574 # os.path.realpath() as needed.
3575 assert not root_dir or (
3576 os.path.realpath(get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003577 assert not any(isinstance(f, Results.Directory) for f in files)
3578 # Remove non existent files.
3579 files = [f for f in files if f.existent]
3580 if not files:
3581 return files
3582 # All files must share the same root, which can be None.
3583 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3584
3585 # Creates a {directory: {filename: File}} mapping, up to root.
3586 buckets = {}
3587 if root_dir:
3588 buckets[root_dir] = {}
3589 for fileobj in files:
3590 path = fileobj.full_path
3591 directory = os.path.dirname(path)
3592 assert directory
3593 # Do not use os.path.basename() so trailing os.path.sep is kept.
3594 basename = path[len(directory)+1:]
3595 files_in_directory = buckets.setdefault(directory, {})
3596 files_in_directory[basename] = fileobj
3597 # Add all the directories recursively up to root.
3598 while True:
3599 old_d = directory
3600 directory = os.path.dirname(directory)
3601 if directory + os.path.sep == root_dir or directory == old_d:
3602 break
3603 buckets.setdefault(directory, {})
3604
3605 root_prefix = len(root_dir) + 1 if root_dir else 0
3606 for directory in sorted(buckets, reverse=True):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003607 if not os.path.isdir(directory):
3608 logging.debug(
3609 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3610 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003611 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3612 expected = set(buckets[directory])
3613 if not (actual - expected):
3614 parent = os.path.dirname(directory)
3615 buckets[parent][os.path.basename(directory)] = Results.Directory(
3616 root_dir,
3617 directory[root_prefix:],
3618 False,
3619 sum(f.size for f in buckets[directory].itervalues()),
3620 sum(f.nb_files for f in buckets[directory].itervalues()))
3621 # Remove the whole bucket.
3622 del buckets[directory]
3623
3624 # Reverse the mapping with what remains. The original instances are returned,
3625 # so the cached meta data is kept.
3626 files = sum((x.values() for x in buckets.itervalues()), [])
3627 return sorted(files, key=lambda x: x.path)
3628
3629
3630def trace(logfile, cmd, cwd, api, output):
3631 """Traces an executable. Returns (returncode, output) from api.
3632
3633 Arguments:
3634 - logfile: file to write to.
3635 - cmd: command to run.
3636 - cwd: current directory to start the process in.
3637 - api: a tracing api instance.
3638 - output: if True, returns output, otherwise prints it at the console.
3639 """
3640 cmd = fix_python_path(cmd)
3641 api.clean_trace(logfile)
3642 with api.get_tracer(logfile) as tracer:
3643 return tracer.trace(cmd, cwd, 'default', output)
3644
3645
maruel@chromium.orge5322512013-08-19 20:17:57 +00003646def CMDclean(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003647 """Cleans up traces."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003648 options, args = parser.parse_args(args)
3649 api = get_api()
3650 api.clean_trace(options.log)
3651 return 0
3652
3653
maruel@chromium.orge5322512013-08-19 20:17:57 +00003654def CMDtrace(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003655 """Traces an executable."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003656 parser.allow_interspersed_args = False
3657 parser.add_option(
3658 '-q', '--quiet', action='store_true',
3659 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003660 parser.add_option(
3661 '-s', '--sudo', action='store_true',
3662 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3663 parser.add_option(
3664 '-n', '--no-sudo', action='store_false',
3665 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003666 options, args = parser.parse_args(args)
3667
3668 if not args:
3669 parser.error('Please provide a command to run')
3670
3671 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3672 args[0] = os.path.abspath(args[0])
3673
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003674 # options.sudo default value is None, which is to do whatever tracer defaults
3675 # do.
3676 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003677 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3678
3679
maruel@chromium.orge5322512013-08-19 20:17:57 +00003680def CMDread(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003681 """Reads the logs and prints the result."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003682 parser.add_option(
3683 '-V', '--variable',
3684 nargs=2,
3685 action='append',
3686 dest='variables',
3687 metavar='VAR_NAME directory',
3688 default=[],
3689 help=('Variables to replace relative directories against. Example: '
3690 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3691 'home dir with $HOME') % getpass.getuser())
3692 parser.add_option(
3693 '--root-dir',
3694 help='Root directory to base everything off it. Anything outside of this '
3695 'this directory will not be reported')
3696 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003697 '--trace-name',
3698 help='Only reads one of the trace. Defaults to reading all traces')
3699 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003700 '-j', '--json', action='store_true',
3701 help='Outputs raw result data as json')
3702 parser.add_option(
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003703 '--trace-blacklist', action='append', default=[],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003704 help='List of regexp to use as blacklist filter')
3705 options, args = parser.parse_args(args)
3706
3707 if options.root_dir:
maruel@chromium.org7dc17b32013-04-02 22:27:05 +00003708 options.root_dir = get_native_path_case(
3709 unicode(os.path.abspath(options.root_dir)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003710
3711 variables = dict(options.variables)
3712 api = get_api()
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003713 blacklist = gen_blacklist(options.trace_blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003714 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003715 # Process each trace.
3716 output_as_json = []
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003717 try:
3718 for item in data:
3719 if 'exception' in item:
3720 # Do not abort the other traces.
3721 print >> sys.stderr, (
3722 'Trace %s: Got an exception: %s' % (
3723 item['trace'], item['exception'][1]))
3724 continue
3725 results = item['results']
3726 if options.root_dir:
3727 results = results.strip_root(options.root_dir)
3728
3729 if options.json:
3730 output_as_json.append(results.flatten())
3731 else:
3732 simplified = extract_directories(
3733 options.root_dir, results.files, blacklist)
3734 simplified = [f.replace_variables(variables) for f in simplified]
3735 if len(data) > 1:
3736 print('Trace: %s' % item['trace'])
3737 print('Total: %d' % len(results.files))
3738 print('Non existent: %d' % len(results.non_existent))
3739 for f in results.non_existent:
3740 print(' %s' % f.path)
3741 print(
3742 'Interesting: %d reduced to %d' % (
3743 len(results.existent), len(simplified)))
3744 for f in simplified:
3745 print(' %s' % f.path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003746
3747 if options.json:
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003748 write_json(sys.stdout, output_as_json, False)
3749 except KeyboardInterrupt:
3750 return 1
3751 except IOError as e:
3752 if e.errno == errno.EPIPE:
3753 # Do not print a stack trace when the output is piped to less and the user
3754 # quits before the whole output was written.
3755 return 1
3756 raise
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003757 return 0
3758
3759
3760class OptionParserWithLogging(optparse.OptionParser):
3761 """Adds --verbose option."""
3762 def __init__(self, verbose=0, **kwargs):
3763 optparse.OptionParser.__init__(self, **kwargs)
3764 self.add_option(
3765 '-v', '--verbose',
3766 action='count',
3767 default=verbose,
3768 help='Use multiple times to increase verbosity')
3769
3770 def parse_args(self, *args, **kwargs):
3771 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
3772 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
3773 logging.basicConfig(
3774 level=levels[min(len(levels)-1, options.verbose)],
3775 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
3776 return options, args
3777
3778
maruel@chromium.orge5322512013-08-19 20:17:57 +00003779class OptionParserTraceInputs(OptionParserWithLogging):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003780 """Adds automatic --log handling."""
3781 def __init__(self, **kwargs):
maruel@chromium.orge5322512013-08-19 20:17:57 +00003782 OptionParserWithLogging.__init__(self, **kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003783 self.add_option(
3784 '-l', '--log', help='Log file to generate or read, required')
3785
3786 def parse_args(self, *args, **kwargs):
3787 """Makes sure the paths make sense.
3788
3789 On Windows, / and \ are often mixed together in a path.
3790 """
maruel@chromium.orge5322512013-08-19 20:17:57 +00003791 options, args = OptionParserWithLogging.parse_args(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003792 self, *args, **kwargs)
3793 if not options.log:
3794 self.error('Must supply a log file with -l')
3795 options.log = os.path.abspath(options.log)
3796 return options, args
3797
3798
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003799def main(argv):
maruel@chromium.orge5322512013-08-19 20:17:57 +00003800 dispatcher = subcommand.CommandDispatcher(__name__)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003801 try:
maruel@chromium.org3d671992013-08-20 00:38:27 +00003802 return dispatcher.execute(
3803 OptionParserTraceInputs(version=__version__), argv)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003804 except TracingFailure, e:
3805 sys.stderr.write('\nError: ')
3806 sys.stderr.write(str(e))
3807 sys.stderr.write('\n')
3808 return 1
3809
3810
3811if __name__ == '__main__':
maruel@chromium.orge5322512013-08-19 20:17:57 +00003812 fix_encoding.fix_encoding()
3813 disable_buffering()
3814 colorama.init()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003815 sys.exit(main(sys.argv[1:]))