blob: 9b27ba67933cb8c6acc0bfcb504fd52145453ae7 [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
3# Copyright (c) 2012 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
maruel@chromium.org9e56b592013-05-21 16:56:05 +000023import errno
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000024import getpass
25import glob
26import json
27import logging
28import optparse
29import os
30import re
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000031import stat
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000032import subprocess
33import sys
34import tempfile
35import threading
36import time
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000037import unicodedata
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000038import weakref
39
maruel@chromium.orge5322512013-08-19 20:17:57 +000040from third_party import colorama
41from third_party.depot_tools import fix_encoding
42from third_party.depot_tools import subcommand
43
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000044## OS-specific imports
45
46if sys.platform == 'win32':
47 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p
48 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611
49 from ctypes.wintypes import GetLastError # pylint: disable=E0611
50elif sys.platform == 'darwin':
51 import Carbon.File # pylint: disable=F0401
52 import MacOS # pylint: disable=F0401
53
54
55BASE_DIR = os.path.dirname(os.path.abspath(__file__))
56ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
57
58
59class TracingFailure(Exception):
60 """An exception occured during tracing."""
61 def __init__(self, description, pid, line_number, line, *args):
62 super(TracingFailure, self).__init__(
63 description, pid, line_number, line, *args)
64 self.description = description
65 self.pid = pid
66 self.line_number = line_number
67 self.line = line
68 self.extra = args
69
70 def __str__(self):
71 out = self.description
72 if self.pid:
73 out += '\npid: %d' % self.pid
74 if self.line_number:
75 out += '\nline: %d' % self.line_number
76 if self.line:
77 out += '\n%s' % self.line
78 if self.extra:
79 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
80 return out
81
82
83## OS-specific functions
84
85if sys.platform == 'win32':
86 def QueryDosDevice(drive_letter):
87 """Returns the Windows 'native' path for a DOS drive letter."""
88 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
maruel@chromium.org306e0e72012-11-02 18:22:03 +000089 assert isinstance(drive_letter, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000090 # Guesswork. QueryDosDeviceW never returns the required number of bytes.
91 chars = 1024
maruel@chromium.org306e0e72012-11-02 18:22:03 +000092 drive_letter = drive_letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000093 p = create_unicode_buffer(chars)
94 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):
95 err = GetLastError()
96 if err:
97 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +000098 msg = u'QueryDosDevice(%s): %s (%d)' % (
99 drive_letter, FormatError(err), err)
100 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000101 return p.value
102
103
104 def GetShortPathName(long_path):
105 """Returns the Windows short path equivalent for a 'long' path."""
maruel@chromium.org68bb1482013-02-15 22:36:24 +0000106 assert isinstance(long_path, unicode), repr(long_path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000107 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
108 # not enforced.
109 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):
110 long_path = '\\\\?\\' + long_path
111 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)
112 if chars:
113 p = create_unicode_buffer(chars)
114 if windll.kernel32.GetShortPathNameW(long_path, p, chars):
115 return p.value
116
117 err = GetLastError()
118 if err:
119 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000120 msg = u'GetShortPathName(%s): %s (%d)' % (
121 long_path, FormatError(err), err)
122 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000123
124
125 def GetLongPathName(short_path):
126 """Returns the Windows long path equivalent for a 'short' path."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000127 assert isinstance(short_path, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000128 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
129 # not enforced.
130 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):
131 short_path = '\\\\?\\' + short_path
132 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)
133 if chars:
134 p = create_unicode_buffer(chars)
135 if windll.kernel32.GetLongPathNameW(short_path, p, chars):
136 return p.value
137
138 err = GetLastError()
139 if err:
140 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000141 msg = u'GetLongPathName(%s): %s (%d)' % (
142 short_path, FormatError(err), err)
143 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000144
145
146 def get_current_encoding():
147 """Returns the 'ANSI' code page associated to the process."""
148 return 'cp%d' % int(windll.kernel32.GetACP())
149
150
151 class DosDriveMap(object):
152 """Maps \Device\HarddiskVolumeN to N: on Windows."""
153 # Keep one global cache.
154 _MAPPING = {}
155
156 def __init__(self):
157 """Lazy loads the cache."""
158 if not self._MAPPING:
159 # This is related to UNC resolver on windows. Ignore that.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000160 self._MAPPING[u'\\Device\\Mup'] = None
161 self._MAPPING[u'\\SystemRoot'] = os.environ[u'SystemRoot']
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000162
163 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
164 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000165 letter = u'%s:' % letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000166 mapped = QueryDosDevice(letter)
167 if mapped in self._MAPPING:
168 logging.warn(
169 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
170 '. Drive letters are a user-mode concept and the kernel '
171 'traces only have NT path, so all accesses will be '
172 'associated with the first drive letter, independent of the '
173 'actual letter used by the code') % (
174 self._MAPPING[mapped], letter))
175 else:
176 self._MAPPING[mapped] = letter
177 except WindowsError: # pylint: disable=E0602
178 pass
179
180 def to_win32(self, path):
181 """Converts a native NT path to Win32/DOS compatible path."""
182 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
183 if not match:
184 raise ValueError(
185 'Can\'t convert %s into a Win32 compatible path' % path,
186 path)
187 if not match.group(1) in self._MAPPING:
188 # Unmapped partitions may be accessed by windows for the
189 # fun of it while the test is running. Discard these.
190 return None
191 drive = self._MAPPING[match.group(1)]
192 if not drive or not match.group(2):
193 return drive
194 return drive + match.group(2)
195
196
197 def isabs(path):
198 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""
199 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
200
201
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000202 def find_item_native_case(root, item):
203 """Gets the native path case of a single item based at root_path."""
204 if item == '..':
205 return item
206
207 root = get_native_path_case(root)
208 return os.path.basename(get_native_path_case(os.path.join(root, item)))
209
210
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000211 def get_native_path_case(p):
212 """Returns the native path case for an existing file.
213
214 On Windows, removes any leading '\\?\'.
215 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000216 assert isinstance(p, unicode), repr(p)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000217 if not isabs(p):
218 raise ValueError(
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000219 'get_native_path_case(%r): Require an absolute path' % p, p)
220
maruel@chromium.org037758d2012-12-10 17:59:46 +0000221 # Make sure it is normalized to os.path.sep. Do not do it here to keep the
222 # function fast
223 assert '/' not in p, p
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000224 suffix = ''
225 count = p.count(':')
226 if count > 1:
227 # This means it has an alternate-data stream. There could be 3 ':', since
228 # it could be the $DATA datastream of an ADS. Split the whole ADS suffix
229 # off and add it back afterward. There is no way to know the native path
230 # case of an alternate data stream.
231 items = p.split(':')
232 p = ':'.join(items[0:2])
233 suffix = ''.join(':' + i for i in items[2:])
234
maruel@chromium.org42b091d2013-04-02 22:27:02 +0000235 # TODO(maruel): Use os.path.normpath?
236 if p.endswith('.\\'):
237 p = p[:-2]
238
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000239 # Windows used to have an option to turn on case sensitivity on non Win32
240 # subsystem but that's out of scope here and isn't supported anymore.
241 # Go figure why GetShortPathName() is needed.
242 try:
243 out = GetLongPathName(GetShortPathName(p))
244 except OSError, e:
245 if e.args[0] in (2, 3, 5):
246 # The path does not exist. Try to recurse and reconstruct the path.
247 base = os.path.dirname(p)
248 rest = os.path.basename(p)
249 return os.path.join(get_native_path_case(base), rest)
250 raise
251 if out.startswith('\\\\?\\'):
252 out = out[4:]
253 # Always upper case the first letter since GetLongPathName() will return the
254 # drive letter in the case it was given.
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000255 return out[0].upper() + out[1:] + suffix
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000256
257
258 def CommandLineToArgvW(command_line):
259 """Splits a commandline into argv using CommandLineToArgvW()."""
260 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
261 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000262 assert isinstance(command_line, unicode)
263 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000264 try:
265 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
266 finally:
267 windll.kernel32.LocalFree(ptr)
268
269
270elif sys.platform == 'darwin':
271
272
273 # On non-windows, keep the stdlib behavior.
274 isabs = os.path.isabs
275
276
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000277 def _native_case(p):
278 """Gets the native path case. Warning: this function resolves symlinks."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000279 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000280 rel_ref, _ = Carbon.File.FSPathMakeRef(p.encode('utf-8'))
281 # The OSX underlying code uses NFD but python strings are in NFC. This
282 # will cause issues with os.listdir() for example. Since the dtrace log
283 # *is* in NFC, normalize it here.
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000284 out = unicodedata.normalize(
285 'NFC', rel_ref.FSRefMakePath().decode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000286 if p.endswith(os.path.sep) and not out.endswith(os.path.sep):
287 return out + os.path.sep
288 return out
289 except MacOS.Error, e:
290 if e.args[0] in (-43, -120):
291 # The path does not exist. Try to recurse and reconstruct the path.
292 # -43 means file not found.
293 # -120 means directory not found.
294 base = os.path.dirname(p)
295 rest = os.path.basename(p)
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000296 return os.path.join(_native_case(base), rest)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000297 raise OSError(
298 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])
299
300
301 def _split_at_symlink_native(base_path, rest):
302 """Returns the native path for a symlink."""
303 base, symlink, rest = split_at_symlink(base_path, rest)
304 if symlink:
305 if not base_path:
306 base_path = base
307 else:
308 base_path = safe_join(base_path, base)
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000309 symlink = find_item_native_case(base_path, symlink)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000310 return base, symlink, rest
311
312
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000313 def find_item_native_case(root_path, item):
314 """Gets the native path case of a single item based at root_path.
315
316 There is no API to get the native path case of symlinks on OSX. So it
317 needs to be done the slow way.
318 """
319 if item == '..':
320 return item
321
322 item = item.lower()
323 for element in os.listdir(root_path):
324 if element.lower() == item:
325 return element
326
327
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000328 def get_native_path_case(path):
329 """Returns the native path case for an existing file.
330
331 Technically, it's only HFS+ on OSX that is case preserving and
332 insensitive. It's the default setting on HFS+ but can be changed.
333 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000334 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000335 if not isabs(path):
336 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000337 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000338 if path.startswith('/dev'):
339 # /dev is not visible from Carbon, causing an exception.
340 return path
341
342 # Starts assuming there is no symlink along the path.
343 resolved = _native_case(path)
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000344 if path.lower() in (resolved.lower(), resolved.lower() + './'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000345 # This code path is incredibly faster.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000346 logging.debug('get_native_path_case(%s) = %s' % (path, resolved))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000347 return resolved
348
349 # There was a symlink, process it.
350 base, symlink, rest = _split_at_symlink_native(None, path)
351 assert symlink, (path, base, symlink, rest, resolved)
352 prev = base
353 base = safe_join(_native_case(base), symlink)
354 assert len(base) > len(prev)
355 while rest:
356 prev = base
357 relbase, symlink, rest = _split_at_symlink_native(base, rest)
358 base = safe_join(base, relbase)
359 assert len(base) > len(prev), (prev, base, symlink)
360 if symlink:
361 base = safe_join(base, symlink)
362 assert len(base) > len(prev), (prev, base, symlink)
363 # Make sure no symlink was resolved.
364 assert base.lower() == path.lower(), (base, path)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000365 logging.debug('get_native_path_case(%s) = %s' % (path, base))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000366 return base
367
368
369else: # OSes other than Windows and OSX.
370
371
372 # On non-windows, keep the stdlib behavior.
373 isabs = os.path.isabs
374
375
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000376 def find_item_native_case(root, item):
377 """Gets the native path case of a single item based at root_path."""
378 if item == '..':
379 return item
380
381 root = get_native_path_case(root)
382 return os.path.basename(get_native_path_case(os.path.join(root, item)))
383
384
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000385 def get_native_path_case(path):
386 """Returns the native path case for an existing file.
387
388 On OSes other than OSX and Windows, assume the file system is
389 case-sensitive.
390
391 TODO(maruel): This is not strictly true. Implement if necessary.
392 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000393 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000394 if not isabs(path):
395 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000396 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000397 # Give up on cygwin, as GetLongPathName() can't be called.
398 # Linux traces tends to not be normalized so use this occasion to normalize
399 # it. This function implementation already normalizes the path on the other
400 # OS so this needs to be done here to be coherent between OSes.
401 out = os.path.normpath(path)
402 if path.endswith(os.path.sep) and not out.endswith(os.path.sep):
403 return out + os.path.sep
404 return out
405
406
407if sys.platform != 'win32': # All non-Windows OSes.
408
409
410 def safe_join(*args):
411 """Joins path elements like os.path.join() but doesn't abort on absolute
412 path.
413
414 os.path.join('foo', '/bar') == '/bar'
415 but safe_join('foo', '/bar') == 'foo/bar'.
416 """
417 out = ''
418 for element in args:
419 if element.startswith(os.path.sep):
420 if out.endswith(os.path.sep):
421 out += element[1:]
422 else:
423 out += element
424 else:
425 if out.endswith(os.path.sep):
426 out += element
427 else:
428 out += os.path.sep + element
429 return out
430
431
432 def split_at_symlink(base_dir, relfile):
433 """Scans each component of relfile and cut the string at the symlink if
434 there is any.
435
436 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if
437 not symlink was found.
438 """
439 if base_dir:
440 assert relfile
441 assert os.path.isabs(base_dir)
442 index = 0
443 else:
444 assert os.path.isabs(relfile)
445 index = 1
446
447 def at_root(rest):
448 if base_dir:
449 return safe_join(base_dir, rest)
450 return rest
451
452 while True:
453 try:
454 index = relfile.index(os.path.sep, index)
455 except ValueError:
456 index = len(relfile)
457 full = at_root(relfile[:index])
458 if os.path.islink(full):
459 # A symlink!
460 base = os.path.dirname(relfile[:index])
461 symlink = os.path.basename(relfile[:index])
462 rest = relfile[index:]
463 logging.debug(
464 'split_at_symlink(%s, %s) -> (%s, %s, %s)' %
465 (base_dir, relfile, base, symlink, rest))
466 return base, symlink, rest
467 if index == len(relfile):
468 break
469 index += 1
470 return relfile, None, None
471
472
maruel@chromium.orgac36fb72013-05-21 14:50:53 +0000473class Unbuffered(object):
474 """Disable buffering on a file object."""
475 def __init__(self, stream):
476 self.stream = stream
477
478 def write(self, data):
479 self.stream.write(data)
480 if '\n' in data:
481 self.stream.flush()
482
483 def __getattr__(self, attr):
484 return getattr(self.stream, attr)
485
486
487def disable_buffering():
488 """Makes this process and child processes stdout unbuffered."""
489 if not os.environ.get('PYTHONUNBUFFERED'):
490 # Since sys.stdout is a C++ object, it's impossible to do
491 # sys.stdout.write = lambda...
492 sys.stdout = Unbuffered(sys.stdout)
493 os.environ['PYTHONUNBUFFERED'] = 'x'
494
495
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000496def fix_python_path(cmd):
497 """Returns the fixed command line to call the right python executable."""
498 out = cmd[:]
499 if out[0] == 'python':
500 out[0] = sys.executable
501 elif out[0].endswith('.py'):
502 out.insert(0, sys.executable)
503 return out
504
505
maruel@chromium.org3683afe2013-07-27 00:09:27 +0000506def gen_blacklist(regexes):
507 """Returns a lambda to be used as a blacklist."""
508 compiled = [re.compile(i) for i in regexes]
509 def match(f):
510 return any(j.match(f) for j in compiled)
511 return match
512
513
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000514def create_subprocess_thunk():
515 """Creates a small temporary script to start the child process.
516
517 This thunk doesn't block, its unique name is used to identify it as the
518 parent.
519 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000520 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000521 try:
522 os.write(
523 handle,
524 (
525 'import subprocess, sys\n'
526 'sys.exit(subprocess.call(sys.argv[2:]))\n'
527 ))
528 finally:
529 os.close(handle)
530 return name
531
532
533def create_exec_thunk():
534 """Creates a small temporary script to start the child executable.
535
536 Reads from the file handle provided as the fisrt argument to block, then
537 execv() the command to be traced.
538 """
539 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
540 try:
541 os.write(
542 handle,
543 (
544 'import os, sys\n'
545 'fd = int(sys.argv[1])\n'
546 # This will block until the controlling process writes a byte on the
547 # pipe. It will do so once the tracing tool, e.g. strace, is ready to
548 # trace.
549 'os.read(fd, 1)\n'
550 'os.close(fd)\n'
551 'os.execv(sys.argv[2], sys.argv[2:])\n'
552 ))
553 finally:
554 os.close(handle)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000555 return name
556
557
558def strace_process_quoted_arguments(text):
559 """Extracts quoted arguments on a string and return the arguments as a list.
560
561 Implemented as an automaton. Supports incomplete strings in the form
562 '"foo"...'.
563
564 Example:
565 With text = '"foo", "bar"', the function will return ['foo', 'bar']
566
567 TODO(maruel): Implement escaping.
568 """
569 # All the possible states of the DFA.
570 ( NEED_QUOTE, # Begining of a new arguments.
571 INSIDE_STRING, # Inside an argument.
572 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
573 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
574 # a serie of 3 dots or a comma.
575 NEED_SPACE, # Right after a comma
576 NEED_DOT_2, # Found a dot, need a second one.
577 NEED_DOT_3, # Found second dot, need a third one.
578 NEED_COMMA, # Found third dot, need a comma.
579 ) = range(8)
580
581 state = NEED_QUOTE
582 out = []
583 for index, char in enumerate(text):
584 if char == '"':
585 if state == NEED_QUOTE:
586 state = INSIDE_STRING
587 # A new argument was found.
588 out.append('')
589 elif state == INSIDE_STRING:
590 # The argument is now closed.
591 state = NEED_COMMA_OR_DOT
592 elif state == ESCAPED:
593 out[-1] += char
594 state = INSIDE_STRING
595 else:
596 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000597 'Can\'t process char \'%s\' at column %d for: %r' % (
598 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000599 index,
600 text)
601 elif char == ',':
602 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
603 state = NEED_SPACE
604 elif state == INSIDE_STRING:
605 out[-1] += char
606 elif state == ESCAPED:
607 out[-1] += char
608 state = INSIDE_STRING
609 else:
610 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000611 'Can\'t process char \'%s\' at column %d for: %r' % (
612 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000613 index,
614 text)
615 elif char == ' ':
616 if state == NEED_SPACE:
617 state = NEED_QUOTE
618 elif state == INSIDE_STRING:
619 out[-1] += char
620 elif state == ESCAPED:
621 out[-1] += char
622 state = INSIDE_STRING
623 else:
624 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000625 'Can\'t process char \'%s\' at column %d for: %r' % (
626 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000627 index,
628 text)
629 elif char == '.':
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000630 if state in (NEED_QUOTE, NEED_COMMA_OR_DOT):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000631 # The string is incomplete, this mean the strace -s flag should be
632 # increased.
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000633 # For NEED_QUOTE, the input string would look like '"foo", ...'.
634 # For NEED_COMMA_OR_DOT, the input string would look like '"foo"...'
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000635 state = NEED_DOT_2
636 elif state == NEED_DOT_2:
637 state = NEED_DOT_3
638 elif state == NEED_DOT_3:
639 state = NEED_COMMA
640 elif state == INSIDE_STRING:
641 out[-1] += char
642 elif state == ESCAPED:
643 out[-1] += char
644 state = INSIDE_STRING
645 else:
646 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000647 'Can\'t process char \'%s\' at column %d for: %r' % (
648 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000649 index,
650 text)
651 elif char == '\\':
652 if state == ESCAPED:
653 out[-1] += char
654 state = INSIDE_STRING
655 elif state == INSIDE_STRING:
656 state = ESCAPED
657 else:
658 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000659 'Can\'t process char \'%s\' at column %d for: %r' % (
660 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000661 index,
662 text)
663 else:
664 if state == INSIDE_STRING:
665 out[-1] += char
666 else:
667 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000668 'Can\'t process char \'%s\' at column %d for: %r' % (
669 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000670 index,
671 text)
672 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
673 raise ValueError(
674 'String is incorrectly terminated: %r' % text,
675 text)
676 return out
677
678
679def read_json(filepath):
680 with open(filepath, 'r') as f:
681 return json.load(f)
682
683
684def write_json(filepath_or_handle, data, dense):
685 """Writes data into filepath or file handle encoded as json.
686
687 If dense is True, the json is packed. Otherwise, it is human readable.
688 """
689 if hasattr(filepath_or_handle, 'write'):
690 if dense:
691 filepath_or_handle.write(
692 json.dumps(data, sort_keys=True, separators=(',',':')))
693 else:
694 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
695 else:
696 with open(filepath_or_handle, 'wb') as f:
697 if dense:
698 json.dump(data, f, sort_keys=True, separators=(',',':'))
699 else:
700 json.dump(data, f, sort_keys=True, indent=2)
701
702
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000703def assert_is_renderable(pseudo_string):
704 """Asserts the input is a valid object to be processed by render()."""
705 assert (
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000706 pseudo_string is None or
707 isinstance(pseudo_string, unicode) or
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000708 hasattr(pseudo_string, 'render')), repr(pseudo_string)
709
710
711def render(pseudo_string):
712 """Converts the pseudo-string to an unicode string."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000713 if pseudo_string is None or isinstance(pseudo_string, unicode):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000714 return pseudo_string
715 return pseudo_string.render()
716
717
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000718class Results(object):
719 """Results of a trace session."""
720
721 class _TouchedObject(object):
722 """Something, a file or a directory, that was accessed."""
723 def __init__(self, root, path, tainted, size, nb_files):
724 logging.debug(
725 '%s(%s, %s, %s, %s, %s)' %
726 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000727 assert_is_renderable(root)
728 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000729 self.root = root
730 self.path = path
731 self.tainted = tainted
732 self.nb_files = nb_files
maruel@chromium.org538141b2013-06-03 20:57:17 +0000733 # Can be used as a cache or a default value, depending on context. In
734 # particular, once self.tainted is True, because the path was replaced
735 # with a variable, it is not possible to look up the file size.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000736 self._size = size
737 # These are cache only.
738 self._real_path = None
739
740 # Check internal consistency.
741 assert path, path
742 assert tainted or bool(root) != bool(isabs(path)), (root, path)
743 assert tainted or (
744 not os.path.exists(self.full_path) or
745 (self.full_path == get_native_path_case(self.full_path))), (
746 tainted, self.full_path, get_native_path_case(self.full_path))
747
748 @property
749 def existent(self):
750 return self.size != -1
751
752 @property
753 def full_path(self):
754 if self.root:
755 return os.path.join(self.root, self.path)
756 return self.path
757
758 @property
759 def real_path(self):
760 """Returns the path with symlinks resolved."""
761 if not self._real_path:
762 self._real_path = os.path.realpath(self.full_path)
763 return self._real_path
764
765 @property
766 def size(self):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000767 """File's size. -1 is not existent.
768
769 Once tainted, it is not possible the retrieve the file size anymore since
770 the path is composed of variables.
771 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000772 if self._size is None and not self.tainted:
773 try:
774 self._size = os.stat(self.full_path).st_size
775 except OSError:
776 self._size = -1
777 return self._size
778
779 def flatten(self):
780 """Returns a dict representing this object.
781
782 A 'size' of 0 means the file was only touched and not read.
783 """
784 return {
785 'path': self.path,
786 'size': self.size,
787 }
788
789 def replace_variables(self, variables):
790 """Replaces the root of this File with one of the variables if it matches.
791
792 If a variable replacement occurs, the cloned object becomes tainted.
793 """
794 for variable, root_path in variables.iteritems():
795 if self.path.startswith(root_path):
796 return self._clone(
797 self.root, variable + self.path[len(root_path):], True)
798 # No need to clone, returns ourself.
799 return self
800
801 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000802 """Returns a clone of itself with 'root' stripped off.
803
804 Note that the file is kept if it is either accessible from a symlinked
805 path that was used to access the file or through the real path.
806 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000807 # Check internal consistency.
808 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root
809 if not self.full_path.startswith(root):
810 # Now try to resolve the symlinks to see if it can be reached this way.
811 # Only try *after* trying without resolving symlink.
812 if not self.real_path.startswith(root):
813 return None
814 path = self.real_path
815 else:
816 path = self.full_path
817 return self._clone(root, path[len(root):], self.tainted)
818
819 def _clone(self, new_root, new_path, tainted):
820 raise NotImplementedError(self.__class__.__name__)
821
822 class File(_TouchedObject):
823 """A file that was accessed. May not be present anymore.
824
825 If tainted is true, it means it is not a real path anymore as a variable
826 replacement occured.
827
maruel@chromium.org538141b2013-06-03 20:57:17 +0000828 |mode| can be one of None, TOUCHED, READ or WRITE.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000829 """
maruel@chromium.org538141b2013-06-03 20:57:17 +0000830 # Was probed for existence, and it is existent, but was never _opened_.
831 TOUCHED = 't'
832 # Opened for read only and guaranteed to not have been written to.
833 READ = 'r'
834 # Opened for write.
835 WRITE = 'w'
836
837 # They are listed in order of priority. E.g. if a file is traced as TOUCHED
838 # then as WRITE, only keep WRITE. None means no idea, which is a problem on
839 # Windows.
840 ACCEPTABLE_MODES = (None, TOUCHED, READ, WRITE)
841
842 def __init__(self, root, path, tainted, size, mode):
843 assert mode in self.ACCEPTABLE_MODES
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000844 super(Results.File, self).__init__(root, path, tainted, size, 1)
maruel@chromium.org538141b2013-06-03 20:57:17 +0000845 self.mode = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000846
847 def _clone(self, new_root, new_path, tainted):
848 """Clones itself keeping meta-data."""
849 # Keep the self.size and self._real_path caches for performance reason. It
850 # is also important when the file becomes tainted (with a variable instead
851 # of the real path) since self.path is not an on-disk path anymore so
852 # out._size cannot be updated.
maruel@chromium.org538141b2013-06-03 20:57:17 +0000853 out = self.__class__(new_root, new_path, tainted, self.size, self.mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000854 out._real_path = self._real_path
855 return out
856
maruel@chromium.org538141b2013-06-03 20:57:17 +0000857 def flatten(self):
858 out = super(Results.File, self).flatten()
859 out['mode'] = self.mode
860 return out
861
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000862 class Directory(_TouchedObject):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000863 """A directory of files. Must exist.
864
865 For a Directory instance, self.size is not a cache, it's an actual value
866 that is never modified and represents the total size of the files contained
867 in this directory. It is possible that the directory is empty so that
868 size==0; this happens if there's only an invalid symlink in it.
869 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000870 def __init__(self, root, path, tainted, size, nb_files):
871 """path='.' is a valid value and must be handled appropriately."""
872 assert not path.endswith(os.path.sep), path
873 super(Results.Directory, self).__init__(
874 root, path + os.path.sep, tainted, size, nb_files)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000875
876 def flatten(self):
877 out = super(Results.Directory, self).flatten()
878 out['nb_files'] = self.nb_files
879 return out
880
881 def _clone(self, new_root, new_path, tainted):
882 """Clones itself keeping meta-data."""
883 out = self.__class__(
884 new_root,
885 new_path.rstrip(os.path.sep),
886 tainted,
887 self.size,
888 self.nb_files)
889 out._real_path = self._real_path
890 return out
891
892 class Process(object):
893 """A process that was traced.
894
895 Contains references to the files accessed by this process and its children.
896 """
897 def __init__(self, pid, files, executable, command, initial_cwd, children):
898 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
899 self.pid = pid
900 self.files = sorted(files, key=lambda x: x.path)
901 self.children = children
902 self.executable = executable
903 self.command = command
904 self.initial_cwd = initial_cwd
905
906 # Check internal consistency.
907 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
908 f.path for f in self.files)
909 assert isinstance(self.children, list)
910 assert isinstance(self.files, list)
911
912 @property
913 def all(self):
914 for child in self.children:
915 for i in child.all:
916 yield i
917 yield self
918
919 def flatten(self):
920 return {
921 'children': [c.flatten() for c in self.children],
922 'command': self.command,
923 'executable': self.executable,
924 'files': [f.flatten() for f in self.files],
925 'initial_cwd': self.initial_cwd,
926 'pid': self.pid,
927 }
928
929 def strip_root(self, root):
930 assert isabs(root) and root.endswith(os.path.sep), root
931 # Loads the files after since they are constructed as objects.
932 out = self.__class__(
933 self.pid,
934 filter(None, (f.strip_root(root) for f in self.files)),
935 self.executable,
936 self.command,
937 self.initial_cwd,
938 [c.strip_root(root) for c in self.children])
939 logging.debug(
940 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
941 return out
942
943 def __init__(self, process):
944 self.process = process
945 # Cache.
946 self._files = None
947
948 def flatten(self):
949 return {
950 'root': self.process.flatten(),
951 }
952
953 @property
954 def files(self):
955 if self._files is None:
956 self._files = sorted(
957 sum((p.files for p in self.process.all), []),
958 key=lambda x: x.path)
959 return self._files
960
961 @property
962 def existent(self):
963 return [f for f in self.files if f.existent]
964
965 @property
966 def non_existent(self):
967 return [f for f in self.files if not f.existent]
968
969 def strip_root(self, root):
970 """Returns a clone with all the files outside the directory |root| removed
971 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000972
973 It keeps files accessible through the |root| directory or that have been
974 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000975 """
976 # Resolve any symlink
977 root = os.path.realpath(root)
978 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep
979 logging.debug('strip_root(%s)' % root)
980 return Results(self.process.strip_root(root))
981
982
983class ApiBase(object):
984 """OS-agnostic API to trace a process and its children."""
985 class Context(object):
986 """Processes one log line at a time and keeps the list of traced processes.
987
988 The parsing is complicated by the fact that logs are traced out of order for
989 strace but in-order for dtrace and logman. In addition, on Windows it is
990 very frequent that processids are reused so a flat list cannot be used. But
991 at the same time, it is impossible to faithfully construct a graph when the
992 logs are processed out of order. So both a tree and a flat mapping are used,
993 the tree is the real process tree, while the flat mapping stores the last
994 valid process for the corresponding processid. For the strace case, the
995 tree's head is guessed at the last moment.
996 """
997 class Process(object):
998 """Keeps context for one traced child process.
999
1000 Logs all the files this process touched. Ignores directories.
1001 """
1002 def __init__(self, blacklist, pid, initial_cwd):
1003 # Check internal consistency.
1004 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001005 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001006 self.pid = pid
1007 # children are Process instances.
1008 self.children = []
1009 self.initial_cwd = initial_cwd
1010 self.cwd = None
maruel@chromium.org538141b2013-06-03 20:57:17 +00001011 self.files = {}
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001012 self.executable = None
1013 self.command = None
1014 self._blacklist = blacklist
1015
1016 def to_results_process(self):
1017 """Resolves file case sensitivity and or late-bound strings."""
1018 # When resolving files, it's normal to get dupe because a file could be
1019 # opened multiple times with different case. Resolve the deduplication
1020 # here.
1021 def fix_path(x):
1022 """Returns the native file path case.
1023
1024 Converts late-bound strings.
1025 """
1026 if not x:
1027 # Do not convert None instance to 'None'.
1028 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001029 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001030 if os.path.isabs(x):
1031 # If the path is not absolute, which tends to happen occasionally on
1032 # Windows, it is not possible to get the native path case so ignore
1033 # that trace. It mostly happens for 'executable' value.
1034 x = get_native_path_case(x)
1035 return x
1036
maruel@chromium.org538141b2013-06-03 20:57:17 +00001037 def fix_and_blacklist_path(x, m):
1038 """Receives a tuple (filepath, mode) and processes filepath."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001039 x = fix_path(x)
1040 if not x:
1041 return
1042 # The blacklist needs to be reapplied, since path casing could
1043 # influence blacklisting.
1044 if self._blacklist(x):
1045 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001046 # Filters out directories. Some may have passed through.
1047 if os.path.isdir(x):
1048 return
1049 return x, m
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001050
maruel@chromium.org538141b2013-06-03 20:57:17 +00001051 # Renders all the files as strings, as some could be RelativePath
1052 # instances. It is important to do it first since there could still be
1053 # multiple entries with the same path but different modes.
1054 rendered = (
1055 fix_and_blacklist_path(f, m) for f, m in self.files.iteritems())
1056 files = sorted(
1057 (f for f in rendered if f),
1058 key=lambda x: (x[0], Results.File.ACCEPTABLE_MODES.index(x[1])))
1059 # Then converting into a dict will automatically clean up lesser
1060 # important values.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001061 files = [
maruel@chromium.org538141b2013-06-03 20:57:17 +00001062 Results.File(None, f, False, None, m)
1063 for f, m in dict(files).iteritems()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001064 ]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001065 return Results.Process(
1066 self.pid,
1067 files,
1068 fix_path(self.executable),
1069 self.command,
1070 fix_path(self.initial_cwd),
1071 [c.to_results_process() for c in self.children])
1072
maruel@chromium.org538141b2013-06-03 20:57:17 +00001073 def add_file(self, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001074 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001075 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001076 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001077 logging.debug('add_file(%d, %s, %s)', self.pid, filepath, mode)
1078 # Note that filepath and not render(filepath) is added. It is because
1079 # filepath could be something else than a string, like a RelativePath
1080 # instance for dtrace logs.
1081 modes = Results.File.ACCEPTABLE_MODES
1082 old_mode = self.files.setdefault(filepath, mode)
1083 if old_mode != mode and modes.index(old_mode) < modes.index(mode):
1084 # Take the highest value.
1085 self.files[filepath] = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001086
1087 def __init__(self, blacklist):
1088 self.blacklist = blacklist
1089 # Initial process.
1090 self.root_process = None
1091 # dict to accelerate process lookup, to not have to lookup the whole graph
1092 # each time.
1093 self._process_lookup = {}
1094
1095 class Tracer(object):
1096 """During it's lifetime, the tracing subsystem is enabled."""
1097 def __init__(self, logname):
1098 self._logname = logname
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001099 self._lock = threading.RLock()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001100 self._traces = []
1101 self._initialized = True
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001102 self._scripts_to_cleanup = []
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001103
1104 def trace(self, cmd, cwd, tracename, output):
1105 """Runs the OS-specific trace program on an executable.
1106
1107 Arguments:
1108 - cmd: The command (a list) to run.
1109 - cwd: Current directory to start the child process in.
1110 - tracename: Name of the trace in the logname file.
1111 - output: If False, redirects output to PIPEs.
1112
1113 Returns a tuple (resultcode, output) and updates the internal trace
1114 entries.
1115 """
1116 # The implementation adds an item to self._traces.
1117 raise NotImplementedError(self.__class__.__name__)
1118
1119 def close(self, _timeout=None):
1120 """Saves the meta-data in the logname file.
1121
1122 For kernel-based tracing, stops the tracing subsystem.
1123
1124 Must not be used manually when using 'with' construct.
1125 """
1126 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001127 if not self._initialized:
1128 raise TracingFailure(
1129 'Called %s.close() on an unitialized object' %
1130 self.__class__.__name__,
1131 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001132 try:
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001133 while self._scripts_to_cleanup:
1134 try:
1135 os.remove(self._scripts_to_cleanup.pop())
1136 except OSError as e:
1137 logging.error('Failed to delete a temporary script: %s', e)
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001138 write_json(self._logname, self._gen_logdata(), False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001139 finally:
1140 self._initialized = False
1141
1142 def post_process_log(self):
1143 """Post-processes the log so it becomes faster to load afterward.
1144
1145 Must not be used manually when using 'with' construct.
1146 """
1147 assert not self._initialized, 'Must stop tracing first.'
1148
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001149 def _gen_logdata(self):
1150 """Returns the data to be saved in the trace file."""
1151 return {
1152 'traces': self._traces,
1153 }
1154
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001155 def __enter__(self):
1156 """Enables 'with' statement."""
1157 return self
1158
1159 def __exit__(self, exc_type, exc_value, traceback):
1160 """Enables 'with' statement."""
1161 self.close()
1162 # If an exception was thrown, do not process logs.
1163 if not exc_type:
1164 self.post_process_log()
1165
1166 def get_tracer(self, logname):
1167 """Returns an ApiBase.Tracer instance.
1168
1169 Initializes the tracing subsystem, which is a requirement for kernel-based
1170 tracers. Only one tracer instance should be live at a time!
1171
1172 logname is the filepath to the json file that will contain the meta-data
1173 about the logs.
1174 """
1175 return self.Tracer(logname)
1176
1177 @staticmethod
1178 def clean_trace(logname):
1179 """Deletes an old log."""
1180 raise NotImplementedError()
1181
1182 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001183 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001184 """Processes trace logs and returns the files opened and the files that do
1185 not exist.
1186
1187 It does not track directories.
1188
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001189 Arguments:
1190 - logname: must be an absolute path.
1191 - blacklist: must be a lambda.
1192 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001193
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001194 Most of the time, files that do not exist are temporary test files that
1195 should be put in /tmp instead. See http://crbug.com/116251.
1196
1197 Returns a list of dict with keys:
1198 - results: A Results instance.
1199 - trace: The corresponding tracename parameter provided to
1200 get_tracer().trace().
1201 - output: Output gathered during execution, if get_tracer().trace(...,
1202 output=False) was used.
1203 """
1204 raise NotImplementedError(cls.__class__.__name__)
1205
1206
1207class Strace(ApiBase):
1208 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001209 @staticmethod
1210 def load_filename(filename):
1211 """Parses a filename in a log."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001212 # TODO(maruel): Be compatible with strace -x.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001213 assert isinstance(filename, str)
1214 out = ''
1215 i = 0
1216 while i < len(filename):
1217 c = filename[i]
1218 if c == '\\':
1219 out += chr(int(filename[i+1:i+4], 8))
1220 i += 4
1221 else:
1222 out += c
1223 i += 1
1224 # TODO(maruel): That's not necessarily true that the current code page is
1225 # utf-8.
1226 return out.decode('utf-8')
1227
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001228 class Context(ApiBase.Context):
1229 """Processes a strace log line and keeps the list of existent and non
1230 existent files accessed.
1231
1232 Ignores directories.
1233
1234 Uses late-binding to processes the cwd of each process. The problem is that
1235 strace generates one log file per process it traced but doesn't give any
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001236 information about which process was started when and by who. So process the
1237 logs out of order and use late binding with RelativePath to be able to
1238 deduce the initial directory of each process once all the logs are parsed.
1239
1240 TODO(maruel): Use the script even in the non-sudo case, so log parsing can
1241 be done in two phase: first find the root process, then process the child
1242 processes in order. With that, it should be possible to not use RelativePath
1243 anymore. This would significantly simplify the code!
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001244 """
1245 class Process(ApiBase.Context.Process):
1246 """Represents the state of a process.
1247
1248 Contains all the information retrieved from the pid-specific log.
1249 """
1250 # Function names are using ([a-z_0-9]+)
1251 # This is the most common format. function(args) = result
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001252 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.*?)\)\s+= (.+)$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001253 # An interrupted function call, only grab the minimal header.
1254 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
1255 # A resumed function call.
1256 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
1257 # A process received a signal.
1258 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
1259 # A process didn't handle a signal. Ignore any junk appearing before,
1260 # because the process was forcibly killed so it won't open any new file.
1261 RE_KILLED = re.compile(
1262 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
1263 # The process has exited.
1264 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
1265 # A call was canceled. Ignore any prefix.
1266 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
1267 # Happens when strace fails to even get the function name.
1268 UNNAMED_FUNCTION = '????'
1269
1270 # Corner-case in python, a class member function decorator must not be
1271 # @staticmethod.
1272 def parse_args(regexp, expect_zero): # pylint: disable=E0213
1273 """Automatically convert the str 'args' into a list of processed
1274 arguments.
1275
1276 Arguments:
1277 - regexp is used to parse args.
1278 - expect_zero: one of True, False or None.
1279 - True: will check for result.startswith('0') first and will ignore
1280 the trace line completely otherwise. This is important because for
1281 many functions, the regexp will not process if the call failed.
1282 - False: will check for not result.startswith(('?', '-1')) for the
1283 same reason than with True.
1284 - None: ignore result.
1285 """
1286 def meta_hook(function):
1287 assert function.__name__.startswith('handle_')
1288 def hook(self, args, result):
1289 if expect_zero is True and not result.startswith('0'):
1290 return
1291 if expect_zero is False and result.startswith(('?', '-1')):
1292 return
1293 match = re.match(regexp, args)
1294 if not match:
1295 raise TracingFailure(
1296 'Failed to parse %s(%s) = %s' %
1297 (function.__name__[len('handle_'):], args, result),
1298 None, None, None)
1299 return function(self, match.groups(), result)
1300 return hook
1301 return meta_hook
1302
1303 class RelativePath(object):
1304 """A late-bound relative path."""
1305 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001306 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001307 self.parent = parent
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001308 assert (
1309 value is None or
1310 (isinstance(value, unicode) and not os.path.isabs(value)))
1311 self.value = value
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001312 if self.value:
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001313 # TODO(maruel): On POSIX, '\\' is a valid character so remove this
1314 # assert.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001315 assert '\\' not in self.value, value
1316 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001317
1318 def render(self):
1319 """Returns the current directory this instance is representing.
1320
1321 This function is used to return the late-bound value.
1322 """
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001323 assert self.parent is not None
1324 parent = render(self.parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001325 if self.value:
1326 return os.path.normpath(os.path.join(parent, self.value))
1327 return parent
1328
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001329 def __init__(self, root, pid):
1330 """Keeps enough information to be able to guess the original process
1331 root.
1332
1333 strace doesn't store which process was the initial process. So more
1334 information needs to be kept so the graph can be reconstructed from the
1335 flat map.
1336 """
1337 logging.info('%s(%d)' % (self.__class__.__name__, pid))
1338 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
1339 assert isinstance(root, ApiBase.Context)
1340 self._root = weakref.ref(root)
1341 # The dict key is the function name of the pending call, like 'open'
1342 # or 'execve'.
1343 self._pending_calls = {}
1344 self._line_number = 0
1345 # Current directory when the process started.
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001346 if isinstance(self._root(), unicode):
1347 self.initial_cwd = self._root()
1348 else:
1349 self.initial_cwd = self.RelativePath(self._root(), None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001350 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001351 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001352
1353 def get_cwd(self):
1354 """Returns the best known value of cwd."""
1355 return self.cwd or self.initial_cwd
1356
1357 def render(self):
1358 """Returns the string value of the RelativePath() object.
1359
1360 Used by RelativePath. Returns the initial directory and not the
1361 current one since the current directory 'cwd' validity is time-limited.
1362
1363 The validity is only guaranteed once all the logs are processed.
1364 """
1365 return self.initial_cwd.render()
1366
1367 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001368 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001369 self._line_number += 1
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001370 try:
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001371 if self._done:
1372 raise TracingFailure(
1373 'Found a trace for a terminated process or corrupted log',
1374 None, None, None)
1375
1376 if self.RE_SIGNAL.match(line):
1377 # Ignore signals.
1378 return
1379
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001380 match = self.RE_KILLED.match(line)
1381 if match:
1382 # Converts a '+++ killed by Foo +++' trace into an exit_group().
1383 self.handle_exit_group(match.group(1), None)
1384 return
1385
1386 match = self.RE_PROCESS_EXITED.match(line)
1387 if match:
1388 # Converts a '+++ exited with 1 +++' trace into an exit_group()
1389 self.handle_exit_group(match.group(1), None)
1390 return
1391
1392 match = self.RE_UNFINISHED.match(line)
1393 if match:
1394 if match.group(1) in self._pending_calls:
1395 raise TracingFailure(
1396 'Found two unfinished calls for the same function',
1397 None, None, None,
1398 self._pending_calls)
1399 self._pending_calls[match.group(1)] = (
1400 match.group(1) + match.group(2))
1401 return
1402
1403 match = self.RE_UNAVAILABLE.match(line)
1404 if match:
1405 # This usually means a process was killed and a pending call was
1406 # canceled.
1407 # TODO(maruel): Look up the last exit_group() trace just above and
1408 # make sure any self._pending_calls[anything] is properly flushed.
1409 return
1410
1411 match = self.RE_RESUMED.match(line)
1412 if match:
1413 if match.group(1) not in self._pending_calls:
1414 raise TracingFailure(
1415 'Found a resumed call that was not logged as unfinished',
1416 None, None, None,
1417 self._pending_calls)
1418 pending = self._pending_calls.pop(match.group(1))
1419 # Reconstruct the line.
1420 line = pending + match.group(2)
1421
1422 match = self.RE_HEADER.match(line)
1423 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001424 # The line is corrupted. It happens occasionally when a process is
1425 # killed forcibly with activity going on. Assume the process died.
1426 # No other line can be processed afterward.
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001427 logging.debug('%d is done: %s', self.pid, line)
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001428 self._done = True
1429 return
1430
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001431 if match.group(1) == self.UNNAMED_FUNCTION:
1432 return
1433
1434 # It's a valid line, handle it.
1435 handler = getattr(self, 'handle_%s' % match.group(1), None)
1436 if not handler:
1437 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1438 return handler(match.group(2), match.group(3))
1439 except TracingFailure, e:
1440 # Hack in the values since the handler could be a static function.
1441 e.pid = self.pid
1442 e.line = line
1443 e.line_number = self._line_number
1444 # Re-raise the modified exception.
1445 raise
1446 except (KeyError, NotImplementedError, ValueError), e:
1447 raise TracingFailure(
1448 'Trace generated a %s exception: %s' % (
1449 e.__class__.__name__, str(e)),
1450 self.pid,
1451 self._line_number,
1452 line,
1453 e)
1454
1455 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1456 def handle_access(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001457 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001458
1459 @parse_args(r'^\"(.+?)\"$', True)
1460 def handle_chdir(self, args, _result):
1461 """Updates cwd."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001462 self.cwd = self._mangle(args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001463 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1464
maruel@chromium.org538141b2013-06-03 20:57:17 +00001465 @parse_args(r'^\"(.+?)\", (\d+), (\d+)$', False)
1466 def handle_chown(self, args, _result):
1467 # TODO(maruel): Look at result?
1468 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org55550722013-05-28 14:43:14 +00001469
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001470 def handle_clone(self, _args, result):
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001471 self._handling_forking('clone', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001472
1473 def handle_close(self, _args, _result):
1474 pass
1475
maruel@chromium.org538141b2013-06-03 20:57:17 +00001476 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1477 def handle_chmod(self, args, _result):
1478 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001479
maruel@chromium.org538141b2013-06-03 20:57:17 +00001480 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1481 def handle_creat(self, args, _result):
1482 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001483
1484 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1485 def handle_execve(self, args, _result):
1486 # Even if in practice execve() doesn't returns when it succeeds, strace
1487 # still prints '0' as the result.
1488 filepath = args[0]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001489 self._handle_file(filepath, Results.File.READ)
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001490 self.executable = self._mangle(filepath)
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001491 try:
1492 self.command = strace_process_quoted_arguments(args[1])
1493 except ValueError as e:
1494 raise TracingFailure(
1495 'Failed to process command line argument:\n%s' % e.args[0],
1496 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001497
1498 def handle_exit_group(self, _args, _result):
1499 """Removes cwd."""
1500 self.cwd = None
1501
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001502 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1503 def handle_faccessat(self, args, _results):
1504 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001505 self._handle_file(args[1], Results.File.TOUCHED)
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001506 else:
1507 raise Exception('Relative faccess not implemented.')
1508
maruel@chromium.org0781f322013-05-28 19:45:49 +00001509 def handle_fallocate(self, _args, result):
1510 pass
1511
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001512 def handle_fork(self, args, result):
1513 self._handle_unknown('fork', args, result)
1514
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001515 def handle_futex(self, _args, _result):
1516 pass
1517
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001518 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1519 def handle_getcwd(self, args, _result):
1520 if os.path.isabs(args[0]):
1521 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1522 if not isinstance(self.cwd, unicode):
1523 # Take the occasion to reset the path.
1524 self.cwd = self._mangle(args[0])
1525 else:
1526 # It should always match.
1527 assert self.cwd == Strace.load_filename(args[0]), (
1528 self.cwd, args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001529
1530 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1531 def handle_link(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001532 self._handle_file(args[0], Results.File.READ)
1533 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001534
1535 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1536 def handle_lstat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001537 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001538
1539 def handle_mkdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001540 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001541 pass
1542
1543 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1544 def handle_open(self, args, _result):
1545 if 'O_DIRECTORY' in args[1]:
1546 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001547 self._handle_file(
1548 args[0],
1549 Results.File.READ if 'O_RDONLY' in args[1] else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001550
1551 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1552 def handle_openat(self, args, _result):
1553 if 'O_DIRECTORY' in args[2]:
1554 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001555 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001556 self._handle_file(
1557 args[1],
1558 Results.File.READ if 'O_RDONLY' in args[2]
1559 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001560 else:
1561 # TODO(maruel): Implement relative open if necessary instead of the
1562 # AT_FDCWD flag, let's hope not since this means tracking all active
1563 # directory handles.
maruel@chromium.org538141b2013-06-03 20:57:17 +00001564 raise NotImplementedError('Relative open via openat not implemented.')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001565
1566 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1567 def handle_readlink(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001568 self._handle_file(args[0], Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001569
1570 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1571 def handle_rename(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001572 self._handle_file(args[0], Results.File.READ)
1573 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001574
1575 def handle_rmdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001576 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001577 pass
1578
1579 def handle_setxattr(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001580 # TODO(maruel): self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001581 pass
1582
1583 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1584 def handle_stat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001585 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001586
maruel@chromium.org538141b2013-06-03 20:57:17 +00001587 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1588 def handle_symlink(self, args, _result):
1589 self._handle_file(args[0], Results.File.TOUCHED)
1590 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001591
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001592 @parse_args(r'^\"(.+?)\", \d+', True)
1593 def handle_truncate(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001594 self._handle_file(args[0], Results.File.WRITE)
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001595
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001596 def handle_unlink(self, _args, _result):
1597 # In theory, the file had to be created anyway.
1598 pass
1599
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001600 def handle_unlinkat(self, _args, _result):
1601 # In theory, the file had to be created anyway.
1602 pass
1603
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001604 def handle_statfs(self, _args, _result):
1605 pass
1606
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001607 def handle_utimensat(self, _args, _result):
1608 pass
1609
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001610 def handle_vfork(self, _args, result):
1611 self._handling_forking('vfork', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001612
1613 @staticmethod
1614 def _handle_unknown(function, args, result):
1615 raise TracingFailure(
1616 'Unexpected/unimplemented trace %s(%s)= %s' %
1617 (function, args, result),
1618 None, None, None)
1619
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001620 def _handling_forking(self, name, result):
1621 """Transfers cwd."""
1622 if result.startswith(('?', '-1')):
1623 # The call failed.
1624 return
1625 # Update the other process right away.
1626 childpid = int(result)
1627 child = self._root().get_or_set_proc(childpid)
1628 if child.parentid is not None or childpid in self.children:
1629 raise TracingFailure(
1630 'Found internal inconsitency in process lifetime detection '
1631 'during a %s() call' % name,
1632 None, None, None)
1633
1634 # Copy the cwd object.
1635 child.initial_cwd = self.get_cwd()
1636 child.parentid = self.pid
1637 # It is necessary because the logs are processed out of order.
1638 self.children.append(child)
1639
maruel@chromium.org538141b2013-06-03 20:57:17 +00001640 def _handle_file(self, filepath, mode):
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001641 filepath = self._mangle(filepath)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001642 self.add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001643
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001644 def _mangle(self, filepath):
1645 """Decodes a filepath found in the log and convert it to a late-bound
1646 path if necessary.
1647
1648 |filepath| is an strace 'encoded' string and the returned value is
1649 either an unicode string if the path was absolute or a late bound path
1650 otherwise.
1651 """
1652 filepath = Strace.load_filename(filepath)
1653 if os.path.isabs(filepath):
1654 return filepath
1655 else:
1656 if isinstance(self.get_cwd(), unicode):
1657 return os.path.normpath(os.path.join(self.get_cwd(), filepath))
1658 return self.RelativePath(self.get_cwd(), filepath)
1659
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001660 def __init__(self, blacklist, root_pid, initial_cwd):
1661 """|root_pid| may be None when the root process is not known.
1662
1663 In that case, a search is done after reading all the logs to figure out
1664 the root process.
1665 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001666 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001667 assert_is_renderable(initial_cwd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001668 self.root_pid = root_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001669 self.initial_cwd = initial_cwd
1670
1671 def render(self):
1672 """Returns the string value of the initial cwd of the root process.
1673
1674 Used by RelativePath.
1675 """
1676 return self.initial_cwd
1677
1678 def on_line(self, pid, line):
1679 """Transfers control into the Process.on_line() function."""
1680 self.get_or_set_proc(pid).on_line(line.strip())
1681
1682 def to_results(self):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001683 """If necessary, finds back the root process and verify consistency."""
1684 if not self.root_pid:
1685 # The non-sudo case. The traced process was started by strace itself,
1686 # so the pid of the traced process is not known.
1687 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1688 if len(root) == 1:
1689 self.root_process = root[0]
1690 # Save it for later.
1691 self.root_pid = self.root_process.pid
1692 else:
1693 # The sudo case. The traced process was started manually so its pid is
1694 # known.
1695 self.root_process = self._process_lookup.get(self.root_pid)
1696 if not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001697 raise TracingFailure(
1698 'Found internal inconsitency in process lifetime detection '
1699 'while finding the root process',
1700 None,
1701 None,
1702 None,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001703 self.root_pid,
1704 sorted(self._process_lookup))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001705 process = self.root_process.to_results_process()
1706 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1707 raise TracingFailure(
1708 'Found internal inconsitency in process lifetime detection '
1709 'while looking for len(tree) == len(list)',
1710 None,
1711 None,
1712 None,
1713 sorted(self._process_lookup),
1714 sorted(p.pid for p in process.all))
1715 return Results(process)
1716
1717 def get_or_set_proc(self, pid):
1718 """Returns the Context.Process instance for this pid or creates a new one.
1719 """
1720 if not pid or not isinstance(pid, int):
1721 raise TracingFailure(
1722 'Unpexpected value for pid: %r' % pid,
1723 pid,
1724 None,
1725 None,
1726 pid)
1727 if pid not in self._process_lookup:
1728 self._process_lookup[pid] = self.Process(self, pid)
1729 return self._process_lookup[pid]
1730
1731 @classmethod
1732 def traces(cls):
1733 """Returns the list of all handled traces to pass this as an argument to
1734 strace.
1735 """
1736 prefix = 'handle_'
1737 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1738
1739 class Tracer(ApiBase.Tracer):
1740 MAX_LEN = 256
1741
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001742 def __init__(self, logname, use_sudo):
1743 super(Strace.Tracer, self).__init__(logname)
1744 self.use_sudo = use_sudo
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001745 if use_sudo:
1746 # TODO(maruel): Use the jump script systematically to make it easy to
1747 # figure out the root process, so RelativePath is not necessary anymore.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001748 self._child_script = create_exec_thunk()
1749 self._scripts_to_cleanup.append(self._child_script)
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001750
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001751 def trace(self, cmd, cwd, tracename, output):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001752 """Runs strace on an executable.
1753
1754 When use_sudo=True, it is a 3-phases process: start the thunk, start
1755 sudo strace with the pid of the thunk and then have the thunk os.execve()
1756 the process to trace.
1757 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001758 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1759 assert os.path.isabs(cmd[0]), cmd[0]
1760 assert os.path.isabs(cwd), cwd
1761 assert os.path.normpath(cwd) == cwd, cwd
1762 with self._lock:
1763 if not self._initialized:
1764 raise TracingFailure(
1765 'Called Tracer.trace() on an unitialized object',
1766 None, None, None, tracename)
1767 assert tracename not in (i['trace'] for i in self._traces)
1768 stdout = stderr = None
1769 if output:
1770 stdout = subprocess.PIPE
1771 stderr = subprocess.STDOUT
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001772
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001773 # Ensure all file related APIs are hooked.
1774 traces = ','.join(Strace.Context.traces() + ['file'])
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001775 flags = [
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001776 # Each child process has its own trace file. It is necessary because
1777 # strace may generate corrupted log file if multiple processes are
1778 # heavily doing syscalls simultaneously.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001779 '-ff',
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001780 # Reduce whitespace usage.
1781 '-a1',
1782 # hex encode non-ascii strings.
1783 # TODO(maruel): '-x',
1784 # TODO(maruel): '-ttt',
1785 # Signals are unnecessary noise here. Note the parser can cope with them
1786 # but reduce the unnecessary output.
1787 '-esignal=none',
1788 # Print as much data as wanted.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001789 '-s', '%d' % self.MAX_LEN,
1790 '-e', 'trace=%s' % traces,
1791 '-o', self._logname + '.' + tracename,
1792 ]
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001793
1794 if self.use_sudo:
1795 pipe_r, pipe_w = os.pipe()
1796 # Start the child process paused.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001797 target_cmd = [sys.executable, self._child_script, str(pipe_r)] + cmd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001798 logging.debug(' '.join(target_cmd))
1799 child_proc = subprocess.Popen(
1800 target_cmd,
1801 stdin=subprocess.PIPE,
1802 stdout=stdout,
1803 stderr=stderr,
1804 cwd=cwd)
1805
1806 # TODO(maruel): both processes must use the same UID for it to work
1807 # without sudo. Look why -p is failing at the moment without sudo.
1808 trace_cmd = [
1809 'sudo',
1810 'strace',
1811 '-p', str(child_proc.pid),
1812 ] + flags
1813 logging.debug(' '.join(trace_cmd))
1814 strace_proc = subprocess.Popen(
1815 trace_cmd,
1816 cwd=cwd,
1817 stdin=subprocess.PIPE,
1818 stdout=subprocess.PIPE,
1819 stderr=subprocess.PIPE)
1820
1821 line = strace_proc.stderr.readline()
1822 if not re.match(r'^Process \d+ attached \- interrupt to quit$', line):
1823 # TODO(maruel): Raise an exception.
1824 assert False, line
1825
1826 # Now fire the child process.
1827 os.write(pipe_w, 'x')
1828
1829 out = child_proc.communicate()[0]
1830 strace_out = strace_proc.communicate()[0]
1831
1832 # TODO(maruel): if strace_proc.returncode: Add an exception.
1833 saved_out = strace_out if strace_proc.returncode else out
1834 root_pid = child_proc.pid
1835 else:
1836 # Non-sudo case.
1837 trace_cmd = [
1838 'strace',
1839 ] + flags + cmd
1840 logging.debug(' '.join(trace_cmd))
1841 child_proc = subprocess.Popen(
1842 trace_cmd,
1843 cwd=cwd,
1844 stdin=subprocess.PIPE,
1845 stdout=stdout,
1846 stderr=stderr)
1847 out = child_proc.communicate()[0]
1848 # TODO(maruel): Walk the logs and figure out the root process would
1849 # simplify parsing the logs a *lot*.
1850 saved_out = out
1851 # The trace reader will have to figure out.
1852 root_pid = None
1853
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001854 with self._lock:
1855 assert tracename not in (i['trace'] for i in self._traces)
1856 self._traces.append(
1857 {
1858 'cmd': cmd,
1859 'cwd': cwd,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001860 'output': saved_out,
1861 'pid': root_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001862 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001863 })
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001864 return child_proc.returncode, out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001865
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001866 def __init__(self, use_sudo=None):
1867 super(Strace, self).__init__()
1868 self.use_sudo = use_sudo
1869
1870 def get_tracer(self, logname):
1871 return self.Tracer(logname, self.use_sudo)
1872
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001873 @staticmethod
1874 def clean_trace(logname):
1875 if os.path.isfile(logname):
1876 os.remove(logname)
1877 # Also delete any pid specific file from previous traces.
1878 for i in glob.iglob(logname + '.*'):
1879 if i.rsplit('.', 1)[1].isdigit():
1880 os.remove(i)
1881
1882 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001883 def parse_log(cls, logname, blacklist, trace_name):
1884 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001885 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001886 data = read_json(logname)
1887 out = []
1888 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001889 if trace_name and item['trace'] != trace_name:
1890 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001891 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001892 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001893 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001894 }
1895 try:
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001896 context = cls.Context(blacklist, item['pid'], item['cwd'])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001897 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001898 logging.debug('Reading %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001899 pid = pidfile.rsplit('.', 1)[1]
1900 if pid.isdigit():
1901 pid = int(pid)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001902 found_line = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001903 for line in open(pidfile, 'rb'):
1904 context.on_line(pid, line)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001905 found_line = True
1906 if not found_line:
1907 # Ensures that a completely empty trace still creates the
1908 # corresponding Process instance by logging a dummy line.
1909 context.on_line(pid, '')
1910 else:
1911 logging.warning('Found unexpected file %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001912 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001913 except TracingFailure:
1914 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001915 out.append(result)
1916 return out
1917
1918
1919class Dtrace(ApiBase):
1920 """Uses DTrace framework through dtrace. Requires root access.
1921
1922 Implies Mac OSX.
1923
1924 dtruss can't be used because it has compatibility issues with python.
1925
1926 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1927 get the absolute path of the 'cwd' dtrace variable from the probe.
1928
1929 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1930 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1931 manually.
1932
1933 errno is not printed in the log since this implementation currently only cares
1934 about files that were successfully opened.
1935 """
1936 class Context(ApiBase.Context):
1937 # Format: index pid function(args)
1938 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1939
1940 # Arguments parsing.
1941 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1942 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1943 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1944 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1945 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1946 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1947
maruel@chromium.orgac361162013-06-04 15:54:06 +00001948 # O_DIRECTORY is not defined on Windows and dtrace doesn't exist on Windows.
1949 O_DIRECTORY = os.O_DIRECTORY if hasattr(os, 'O_DIRECTORY') else None
maruel@chromium.org538141b2013-06-03 20:57:17 +00001950 O_RDWR = os.O_RDWR
1951 O_WRONLY = os.O_WRONLY
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001952
1953 class Process(ApiBase.Context.Process):
1954 def __init__(self, *args):
1955 super(Dtrace.Context.Process, self).__init__(*args)
1956 self.cwd = self.initial_cwd
1957
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001958 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001959 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001960 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001961 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001962 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001963 # Process ID of the temporary script created by create_subprocess_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001964 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001965 self._initial_cwd = initial_cwd
1966 self._line_number = 0
1967
1968 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001969 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001970 self._line_number += 1
1971 match = self.RE_HEADER.match(line)
1972 if not match:
1973 raise TracingFailure(
1974 'Found malformed line: %s' % line,
1975 None,
1976 self._line_number,
1977 line)
1978 fn = getattr(
1979 self,
1980 'handle_%s' % match.group(2).replace('-', '_'),
1981 self._handle_ignored)
1982 # It is guaranteed to succeed because of the regexp. Or at least I thought
1983 # it would.
1984 pid = int(match.group(1))
1985 try:
1986 return fn(pid, match.group(3))
1987 except TracingFailure, e:
1988 # Hack in the values since the handler could be a static function.
1989 e.pid = pid
1990 e.line = line
1991 e.line_number = self._line_number
1992 # Re-raise the modified exception.
1993 raise
1994 except (KeyError, NotImplementedError, ValueError), e:
1995 raise TracingFailure(
1996 'Trace generated a %s exception: %s' % (
1997 e.__class__.__name__, str(e)),
1998 pid,
1999 self._line_number,
2000 line,
2001 e)
2002
2003 def to_results(self):
2004 process = self.root_process.to_results_process()
2005 # Internal concistency check.
2006 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
2007 raise TracingFailure(
2008 'Found internal inconsitency in process lifetime detection '
2009 'while looking for len(tree) == len(list)',
2010 None,
2011 None,
2012 None,
2013 sorted(self._process_lookup),
2014 sorted(p.pid for p in process.all))
2015 return Results(process)
2016
2017 def handle_dtrace_BEGIN(self, _pid, args):
2018 if not self.RE_DTRACE_BEGIN.match(args):
2019 raise TracingFailure(
2020 'Found internal inconsitency in dtrace_BEGIN log line',
2021 None, None, None)
2022
2023 def handle_proc_start(self, pid, args):
2024 """Transfers cwd.
2025
2026 The dtrace script already takes care of only tracing the processes that
2027 are child of the traced processes so there is no need to verify the
2028 process hierarchy.
2029 """
2030 if pid in self._process_lookup:
2031 raise TracingFailure(
2032 'Found internal inconsitency in proc_start: %d started two times' %
2033 pid,
2034 None, None, None)
2035 match = self.RE_PROC_START.match(args)
2036 if not match:
2037 raise TracingFailure(
2038 'Failed to parse arguments: %s' % args,
2039 None, None, None)
2040 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002041 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002042 proc = self.root_process = self.Process(
2043 self.blacklist, pid, self._initial_cwd)
2044 elif ppid in self._process_lookup:
2045 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
2046 self._process_lookup[ppid].children.append(proc)
2047 else:
2048 # Another process tree, ignore.
2049 return
2050 self._process_lookup[pid] = proc
2051 logging.debug(
2052 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002053 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002054
2055 def handle_proc_exit(self, pid, _args):
2056 """Removes cwd."""
2057 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002058 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002059 self._process_lookup[pid].cwd = None
2060
2061 def handle_execve(self, pid, args):
2062 """Sets the process' executable.
2063
2064 TODO(maruel): Read command line arguments. See
2065 https://discussions.apple.com/thread/1980539 for an example.
2066 https://gist.github.com/1242279
2067
2068 Will have to put the answer at http://stackoverflow.com/questions/7556249.
2069 :)
2070 """
2071 if not pid in self._process_lookup:
2072 # Another process tree, ignore.
2073 return
2074 match = self.RE_EXECVE.match(args)
2075 if not match:
2076 raise TracingFailure(
2077 'Failed to parse arguments: %r' % args,
2078 None, None, None)
2079 proc = self._process_lookup[pid]
2080 proc.executable = match.group(1)
maruel@chromium.org538141b2013-06-03 20:57:17 +00002081 self._handle_file(pid, proc.executable, Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002082 proc.command = self.process_escaped_arguments(match.group(3))
2083 if int(match.group(2)) != len(proc.command):
2084 raise TracingFailure(
2085 'Failed to parse execve() arguments: %s' % args,
2086 None, None, None)
2087
2088 def handle_chdir(self, pid, args):
2089 """Updates cwd."""
2090 if pid not in self._process_lookup:
2091 # Another process tree, ignore.
2092 return
2093 cwd = self.RE_CHDIR.match(args).group(1)
2094 if not cwd.startswith('/'):
2095 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
2096 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
2097 else:
2098 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
2099 cwd2 = cwd
2100 self._process_lookup[pid].cwd = cwd2
2101
2102 def handle_open_nocancel(self, pid, args):
2103 """Redirects to handle_open()."""
2104 return self.handle_open(pid, args)
2105
2106 def handle_open(self, pid, args):
2107 if pid not in self._process_lookup:
2108 # Another process tree, ignore.
2109 return
2110 match = self.RE_OPEN.match(args)
2111 if not match:
2112 raise TracingFailure(
2113 'Failed to parse arguments: %s' % args,
2114 None, None, None)
2115 flag = int(match.group(2), 16)
2116 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
2117 # Ignore directories.
2118 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002119 self._handle_file(
2120 pid,
2121 match.group(1),
2122 Results.File.READ if not ((self.O_RDWR | self.O_WRONLY) & flag)
2123 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002124
2125 def handle_rename(self, pid, args):
2126 if pid not in self._process_lookup:
2127 # Another process tree, ignore.
2128 return
2129 match = self.RE_RENAME.match(args)
2130 if not match:
2131 raise TracingFailure(
2132 'Failed to parse arguments: %s' % args,
2133 None, None, None)
maruel@chromium.org538141b2013-06-03 20:57:17 +00002134 self._handle_file(pid, match.group(1), Results.File.READ)
2135 self._handle_file(pid, match.group(2), Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002136
maruel@chromium.org538141b2013-06-03 20:57:17 +00002137 def _handle_file(self, pid, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002138 if not filepath.startswith('/'):
2139 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
2140 # We can get '..' in the path.
2141 filepath = os.path.normpath(filepath)
2142 # Sadly, still need to filter out directories here;
2143 # saw open_nocancel(".", 0, 0) = 0 lines.
2144 if os.path.isdir(filepath):
2145 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002146 self._process_lookup[pid].add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002147
2148 def handle_ftruncate(self, pid, args):
2149 """Just used as a signal to kill dtrace, ignoring."""
2150 pass
2151
2152 @staticmethod
2153 def _handle_ignored(pid, args):
2154 """Is called for all the event traces that are not handled."""
2155 raise NotImplementedError('Please implement me')
2156
2157 @staticmethod
2158 def process_escaped_arguments(text):
2159 """Extracts escaped arguments on a string and return the arguments as a
2160 list.
2161
2162 Implemented as an automaton.
2163
2164 Example:
2165 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
2166 function will return ['python2.7', '-c', 'print("hi")]
2167 """
2168 if not text.endswith('\\0'):
2169 raise ValueError('String is not null terminated: %r' % text, text)
2170 text = text[:-2]
2171
2172 def unescape(x):
2173 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
2174 out = []
2175 escaped = False
2176 for i in x:
2177 if i == '\\' and not escaped:
2178 escaped = True
2179 continue
2180 escaped = False
2181 out.append(i)
2182 return ''.join(out)
2183
2184 return [unescape(i) for i in text.split('\\001')]
2185
2186 class Tracer(ApiBase.Tracer):
2187 # pylint: disable=C0301
2188 #
2189 # To understand the following code, you'll want to take a look at:
2190 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
2191 # https://wikis.oracle.com/display/DTrace/Variables
2192 # http://docs.oracle.com/cd/E19205-01/820-4221/
2193 #
2194 # 0. Dump all the valid probes into a text file. It is important, you
2195 # want to redirect into a file and you don't want to constantly 'sudo'.
2196 # $ sudo dtrace -l > probes.txt
2197 #
2198 # 1. Count the number of probes:
2199 # $ wc -l probes.txt
2200 # 81823 # On OSX 10.7, including 1 header line.
2201 #
2202 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
2203 # likes and skipping the header with NR>1:
2204 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
2205 # dtrace
2206 # fbt
2207 # io
2208 # ip
2209 # lockstat
2210 # mach_trap
2211 # proc
2212 # profile
2213 # sched
2214 # syscall
2215 # tcp
2216 # vminfo
2217 #
2218 # 3. List of valid probes:
2219 # $ grep syscall probes.txt | less
2220 # or use dtrace directly:
2221 # $ sudo dtrace -l -P syscall | less
2222 #
2223 # trackedpid is an associative array where its value can be 0, 1 or 2.
2224 # 0 is for untracked processes and is the default value for items not
2225 # in the associative array.
2226 # 1 is for tracked processes.
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002227 # 2 is for the script created by create_subprocess_thunk() only. It is not
2228 # tracked itself but all its decendants are.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002229 #
2230 # The script will kill itself only once waiting_to_die == 1 and
2231 # current_processes == 0, so that both getlogin() was called and that
2232 # all traced processes exited.
2233 #
2234 # TODO(maruel): Use cacheable predicates. See
2235 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
2236 D_CODE = """
2237 dtrace:::BEGIN {
2238 waiting_to_die = 0;
2239 current_processes = 0;
2240 logindex = 0;
2241 printf("%d %d %s_%s(\\"%s\\")\\n",
2242 logindex, PID, probeprov, probename, SCRIPT);
2243 logindex++;
2244 }
2245
2246 proc:::start /trackedpid[ppid]/ {
2247 trackedpid[pid] = 1;
2248 current_processes += 1;
2249 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
2250 logindex, pid, probeprov, probename,
2251 ppid,
2252 execname,
2253 current_processes);
2254 logindex++;
2255 }
2256 /* Should use SCRIPT but there is no access to this variable at that
2257 * point. */
2258 proc:::start /ppid == PID && execname == "Python"/ {
2259 trackedpid[pid] = 2;
2260 current_processes += 1;
2261 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
2262 logindex, pid, probeprov, probename,
2263 ppid,
2264 execname,
2265 current_processes);
2266 logindex++;
2267 }
2268 proc:::exit /trackedpid[pid] &&
2269 current_processes == 1 &&
2270 waiting_to_die == 1/ {
2271 trackedpid[pid] = 0;
2272 current_processes -= 1;
2273 printf("%d %d %s_%s(%d)\\n",
2274 logindex, pid, probeprov, probename,
2275 current_processes);
2276 logindex++;
2277 exit(0);
2278 }
2279 proc:::exit /trackedpid[pid]/ {
2280 trackedpid[pid] = 0;
2281 current_processes -= 1;
2282 printf("%d %d %s_%s(%d)\\n",
2283 logindex, pid, probeprov, probename,
2284 current_processes);
2285 logindex++;
2286 }
2287
2288 /* Use an arcane function to detect when we need to die */
2289 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
2290 waiting_to_die = 1;
2291 printf("%d %d %s()\\n", logindex, pid, probefunc);
2292 logindex++;
2293 }
2294 syscall::ftruncate:entry /
2295 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
2296 exit(0);
2297 }
2298
2299 syscall::open*:entry /trackedpid[pid] == 1/ {
2300 self->open_arg0 = arg0;
2301 self->open_arg1 = arg1;
2302 self->open_arg2 = arg2;
2303 }
2304 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
2305 this->open_arg0 = copyinstr(self->open_arg0);
2306 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
2307 logindex, pid, probefunc,
2308 this->open_arg0,
2309 self->open_arg1,
2310 self->open_arg2);
2311 logindex++;
2312 this->open_arg0 = 0;
2313 }
2314 syscall::open*:return /trackedpid[pid] == 1/ {
2315 self->open_arg0 = 0;
2316 self->open_arg1 = 0;
2317 self->open_arg2 = 0;
2318 }
2319
2320 syscall::rename:entry /trackedpid[pid] == 1/ {
2321 self->rename_arg0 = arg0;
2322 self->rename_arg1 = arg1;
2323 }
2324 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
2325 this->rename_arg0 = copyinstr(self->rename_arg0);
2326 this->rename_arg1 = copyinstr(self->rename_arg1);
2327 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
2328 logindex, pid, probefunc,
2329 this->rename_arg0,
2330 this->rename_arg1);
2331 logindex++;
2332 this->rename_arg0 = 0;
2333 this->rename_arg1 = 0;
2334 }
2335 syscall::rename:return /trackedpid[pid] == 1/ {
2336 self->rename_arg0 = 0;
2337 self->rename_arg1 = 0;
2338 }
2339
2340 /* Track chdir, it's painful because it is only receiving relative path.
2341 */
2342 syscall::chdir:entry /trackedpid[pid] == 1/ {
2343 self->chdir_arg0 = arg0;
2344 }
2345 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
2346 this->chdir_arg0 = copyinstr(self->chdir_arg0);
2347 printf("%d %d %s(\\"%s\\")\\n",
2348 logindex, pid, probefunc,
2349 this->chdir_arg0);
2350 logindex++;
2351 this->chdir_arg0 = 0;
2352 }
2353 syscall::chdir:return /trackedpid[pid] == 1/ {
2354 self->chdir_arg0 = 0;
2355 }
2356 """
2357
2358 # execve-specific code, tends to throw a lot of exceptions.
2359 D_CODE_EXECVE = """
2360 /* Finally what we care about! */
2361 syscall::exec*:entry /trackedpid[pid]/ {
2362 self->exec_arg0 = copyinstr(arg0);
2363 /* Incrementally probe for a NULL in the argv parameter of execve() to
2364 * figure out argc. */
2365 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
2366 * found. */
2367 self->exec_argc = 0;
2368 /* Probe for argc==1 */
2369 this->exec_argv = (user_addr_t*)copyin(
2370 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2371 self->exec_argc = this->exec_argv[self->exec_argc] ?
2372 (self->exec_argc + 1) : self->exec_argc;
2373
2374 /* Probe for argc==2 */
2375 this->exec_argv = (user_addr_t*)copyin(
2376 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2377 self->exec_argc = this->exec_argv[self->exec_argc] ?
2378 (self->exec_argc + 1) : self->exec_argc;
2379
2380 /* Probe for argc==3 */
2381 this->exec_argv = (user_addr_t*)copyin(
2382 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2383 self->exec_argc = this->exec_argv[self->exec_argc] ?
2384 (self->exec_argc + 1) : self->exec_argc;
2385
2386 /* Probe for argc==4 */
2387 this->exec_argv = (user_addr_t*)copyin(
2388 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2389 self->exec_argc = this->exec_argv[self->exec_argc] ?
2390 (self->exec_argc + 1) : self->exec_argc;
2391
2392 /* Copy the inputs strings since there is no guarantee they'll be
2393 * present after the call completed. */
2394 self->exec_argv0 = (self->exec_argc > 0) ?
2395 copyinstr(this->exec_argv[0]) : "";
2396 self->exec_argv1 = (self->exec_argc > 1) ?
2397 copyinstr(this->exec_argv[1]) : "";
2398 self->exec_argv2 = (self->exec_argc > 2) ?
2399 copyinstr(this->exec_argv[2]) : "";
2400 self->exec_argv3 = (self->exec_argc > 3) ?
2401 copyinstr(this->exec_argv[3]) : "";
2402 this->exec_argv = 0;
2403 }
2404 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2405 /* We need to join strings here, as using multiple printf() would
2406 * cause tearing when multiple threads/processes are traced.
2407 * Since it is impossible to escape a string and join it to another one,
2408 * like sprintf("%s%S", previous, more), use hackery.
2409 * Each of the elements are split with a \\1. \\0 cannot be used because
2410 * it is simply ignored. This will conflict with any program putting a
2411 * \\1 in their execve() string but this should be "rare enough" */
2412 this->args = "";
2413 /* Process exec_argv[0] */
2414 this->args = strjoin(
2415 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2416
2417 /* Process exec_argv[1] */
2418 this->args = strjoin(
2419 this->args, (self->exec_argc > 1) ? "\\1" : "");
2420 this->args = strjoin(
2421 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2422
2423 /* Process exec_argv[2] */
2424 this->args = strjoin(
2425 this->args, (self->exec_argc > 2) ? "\\1" : "");
2426 this->args = strjoin(
2427 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2428
2429 /* Process exec_argv[3] */
2430 this->args = strjoin(
2431 this->args, (self->exec_argc > 3) ? "\\1" : "");
2432 this->args = strjoin(
2433 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2434
2435 /* Prints self->exec_argc to permits verifying the internal
2436 * consistency since this code is quite fishy. */
2437 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2438 logindex, pid, probefunc,
2439 self->exec_arg0,
2440 self->exec_argc,
2441 this->args);
2442 logindex++;
2443 this->args = 0;
2444 }
2445 syscall::exec*:return /trackedpid[pid]/ {
2446 self->exec_arg0 = 0;
2447 self->exec_argc = 0;
2448 self->exec_argv0 = 0;
2449 self->exec_argv1 = 0;
2450 self->exec_argv2 = 0;
2451 self->exec_argv3 = 0;
2452 }
2453 """
2454
2455 # Code currently not used.
2456 D_EXTRANEOUS = """
2457 /* This is a good learning experience, since it traces a lot of things
2458 * related to the process and child processes.
2459 * Warning: it generates a gigantic log. For example, tracing
2460 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2461 * several minutes to execute.
2462 */
2463 /*
2464 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2465 printf("%d %d %s_%s() = %d\\n",
2466 logindex, pid, probeprov, probefunc, errno);
2467 logindex++;
2468 }
2469 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2470 printf("%d %d %s_%s() = %d\\n",
2471 logindex, pid, probeprov, probefunc, errno);
2472 logindex++;
2473 }
2474 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2475 printf("%d %d %s_%s() = %d\\n",
2476 logindex, pid, probeprov, probefunc, errno);
2477 logindex++;
2478 }
2479 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2480 printf("%d %d %s_%s() = %d\\n",
2481 logindex, pid, probeprov, probefunc, errno);
2482 logindex++;
2483 }
2484 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2485 printf("%d %d %s_%s() = %d\\n",
2486 logindex, pid, probeprov, probefunc, errno);
2487 logindex++;
2488 }
2489 */
2490 /* TODO(maruel): *stat* functions and friends
2491 syscall::access:return,
2492 syscall::chdir:return,
2493 syscall::chflags:return,
2494 syscall::chown:return,
2495 syscall::chroot:return,
2496 syscall::getattrlist:return,
2497 syscall::getxattr:return,
2498 syscall::lchown:return,
2499 syscall::lstat64:return,
2500 syscall::lstat:return,
2501 syscall::mkdir:return,
2502 syscall::pathconf:return,
2503 syscall::readlink:return,
2504 syscall::removexattr:return,
2505 syscall::setxattr:return,
2506 syscall::stat64:return,
2507 syscall::stat:return,
2508 syscall::truncate:return,
2509 syscall::unlink:return,
2510 syscall::utimes:return,
2511 */
2512 """
2513
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002514 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002515 """Starts the log collection with dtrace.
2516
2517 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2518 this needs to wait for dtrace to be "warmed up".
2519 """
2520 super(Dtrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002521 # This script is used as a signal to figure out the root process.
2522 self._signal_script = create_subprocess_thunk()
2523 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002524 # This unique dummy temp file is used to signal the dtrace script that it
2525 # should stop as soon as all the child processes are done. A bit hackish
2526 # but works fine enough.
2527 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2528 prefix='trace_signal_file')
2529
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002530 dtrace_path = '/usr/sbin/dtrace'
2531 if not os.path.isfile(dtrace_path):
2532 dtrace_path = 'dtrace'
2533 elif use_sudo is None and (os.stat(dtrace_path).st_mode & stat.S_ISUID):
2534 # No need to sudo. For those following at home, don't do that.
2535 use_sudo = False
2536
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002537 # Note: do not use the -p flag. It's useless if the initial process quits
2538 # too fast, resulting in missing traces from the grand-children. The D
2539 # code manages the dtrace lifetime itself.
2540 trace_cmd = [
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002541 dtrace_path,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002542 # Use a larger buffer if getting 'out of scratch space' errors.
2543 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2544 '-b', '10m',
2545 '-x', 'dynvarsize=10m',
2546 #'-x', 'dtrace_global_maxsize=1m',
2547 '-x', 'evaltime=exec',
2548 '-o', '/dev/stderr',
2549 '-q',
2550 '-n', self._get_dtrace_code(),
2551 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002552 if use_sudo is not False:
2553 trace_cmd.insert(0, 'sudo')
2554
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002555 with open(self._logname + '.log', 'wb') as logfile:
2556 self._dtrace = subprocess.Popen(
2557 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2558 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2559
2560 # Reads until one line is printed, which signifies dtrace is up and ready.
2561 with open(self._logname + '.log', 'rb') as logfile:
2562 while 'dtrace_BEGIN' not in logfile.readline():
2563 if self._dtrace.poll() is not None:
2564 # Do a busy wait. :/
2565 break
2566 logging.debug('dtrace started')
2567
2568 def _get_dtrace_code(self):
2569 """Setups the D code to implement child process tracking.
2570
2571 Injects the cookie in the script so it knows when to stop.
2572
2573 The script will detect any instance of the script created with
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002574 create_subprocess_thunk() and will start tracing it.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002575 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002576 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002577 'inline int PID = %d;\n'
2578 'inline string SCRIPT = "%s";\n'
2579 'inline int FILE_ID = %d;\n'
2580 '\n'
2581 '%s') % (
2582 os.getpid(),
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002583 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002584 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002585 self.D_CODE)
2586 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2587 # Do not enable by default since it tends to spew dtrace: error lines
2588 # because the execve() parameters are not in valid memory at the time of
2589 # logging.
2590 # TODO(maruel): Find a way to make this reliable since it's useful but
2591 # only works in limited/trivial uses cases for now.
2592 out += self.D_CODE_EXECVE
2593 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002594
2595 def trace(self, cmd, cwd, tracename, output):
2596 """Runs dtrace on an executable.
2597
2598 This dtruss is broken when it starts the process itself or when tracing
2599 child processes, this code starts a wrapper process
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002600 generated with create_subprocess_thunk() which starts the executable to
2601 trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002602 """
2603 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2604 assert os.path.isabs(cmd[0]), cmd[0]
2605 assert os.path.isabs(cwd), cwd
2606 assert os.path.normpath(cwd) == cwd, cwd
2607 with self._lock:
2608 if not self._initialized:
2609 raise TracingFailure(
2610 'Called Tracer.trace() on an unitialized object',
2611 None, None, None, tracename)
2612 assert tracename not in (i['trace'] for i in self._traces)
2613
2614 # Starts the script wrapper to start the child process. This signals the
2615 # dtrace script that this process is to be traced.
2616 stdout = stderr = None
2617 if output:
2618 stdout = subprocess.PIPE
2619 stderr = subprocess.STDOUT
2620 child_cmd = [
2621 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002622 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002623 tracename,
2624 ]
2625 # Call a dummy function so that dtrace knows I'm about to launch a process
2626 # that needs to be traced.
2627 # Yummy.
2628 child = subprocess.Popen(
2629 child_cmd + fix_python_path(cmd),
2630 stdin=subprocess.PIPE,
2631 stdout=stdout,
2632 stderr=stderr,
2633 cwd=cwd)
2634 logging.debug('Started child pid: %d' % child.pid)
2635
2636 out = child.communicate()[0]
2637 # This doesn't mean tracing is done, one of the grand-child process may
2638 # still be alive. It will be tracked with the dtrace script.
2639
2640 with self._lock:
2641 assert tracename not in (i['trace'] for i in self._traces)
2642 self._traces.append(
2643 {
2644 'cmd': cmd,
2645 'cwd': cwd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002646 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002647 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002648 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002649 })
2650 return child.returncode, out
2651
2652 def close(self, timeout=None):
2653 """Terminates dtrace."""
2654 logging.debug('close(%s)' % timeout)
2655 try:
2656 try:
2657 super(Dtrace.Tracer, self).close(timeout)
2658 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002659 # ftruncate doesn't exist on Windows.
2660 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002661 if timeout:
2662 start = time.time()
2663 # Use polling. :/
2664 while (self._dtrace.poll() is None and
2665 (time.time() - start) < timeout):
2666 time.sleep(0.1)
2667 self._dtrace.kill()
2668 self._dtrace.wait()
2669 finally:
2670 # Make sure to kill it in any case.
2671 if self._dtrace.poll() is None:
2672 try:
2673 self._dtrace.kill()
2674 self._dtrace.wait()
2675 except OSError:
2676 pass
2677
2678 if self._dtrace.returncode != 0:
2679 # Warn about any dtrace failure but basically ignore it.
2680 print 'dtrace failure: %s' % self._dtrace.returncode
2681 finally:
2682 os.close(self._dummy_file_id)
2683 os.remove(self._dummy_file_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002684
2685 def post_process_log(self):
2686 """Sorts the log back in order when each call occured.
2687
2688 dtrace doesn't save the buffer in strict order since it keeps one buffer
2689 per CPU.
2690 """
2691 super(Dtrace.Tracer, self).post_process_log()
2692 logname = self._logname + '.log'
2693 with open(logname, 'rb') as logfile:
2694 lines = [l for l in logfile if l.strip()]
2695 errors = [l for l in lines if l.startswith('dtrace:')]
2696 if errors:
2697 raise TracingFailure(
2698 'Found errors in the trace: %s' % '\n'.join(errors),
2699 None, None, None, logname)
2700 try:
2701 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2702 except ValueError:
2703 raise TracingFailure(
2704 'Found errors in the trace: %s' % '\n'.join(
2705 l for l in lines if l.split(' ', 1)[0].isdigit()),
2706 None, None, None, logname)
2707 with open(logname, 'wb') as logfile:
2708 logfile.write(''.join(lines))
2709
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002710 def __init__(self, use_sudo=None):
2711 super(Dtrace, self).__init__()
2712 self.use_sudo = use_sudo
2713
2714 def get_tracer(self, logname):
2715 return self.Tracer(logname, self.use_sudo)
2716
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002717 @staticmethod
2718 def clean_trace(logname):
2719 for ext in ('', '.log'):
2720 if os.path.isfile(logname + ext):
2721 os.remove(logname + ext)
2722
2723 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002724 def parse_log(cls, logname, blacklist, trace_name):
2725 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002726 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002727
2728 def blacklist_more(filepath):
2729 # All the HFS metadata is in the form /.vol/...
2730 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2731
2732 data = read_json(logname)
2733 out = []
2734 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002735 if trace_name and item['trace'] != trace_name:
2736 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002737 result = {
2738 'output': item['output'],
2739 'trace': item['trace'],
2740 }
2741 try:
2742 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002743 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2744 # be valid UTF-8 and we control the log output.
2745 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002746 context.on_line(line)
2747 result['results'] = context.to_results()
2748 except TracingFailure:
2749 result['exception'] = sys.exc_info()
2750 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002751 return out
2752
2753
2754class LogmanTrace(ApiBase):
2755 """Uses the native Windows ETW based tracing functionality to trace a child
2756 process.
2757
2758 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2759 the Windows Kernel doesn't have a concept of 'current working directory' at
2760 all. A Win32 process has a map of current directories, one per drive letter
2761 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2762 opened relative to another file_object or as an absolute path. All the current
2763 working directory logic is done in user mode.
2764 """
2765 class Context(ApiBase.Context):
2766 """Processes a ETW log line and keeps the list of existent and non
2767 existent files accessed.
2768
2769 Ignores directories.
2770 """
2771 # These indexes are for the stripped version in json.
2772 EVENT_NAME = 0
2773 TYPE = 1
2774 PID = 2
2775 TID = 3
2776 PROCESSOR_ID = 4
2777 TIMESTAMP = 5
2778 USER_DATA = 6
2779
2780 class Process(ApiBase.Context.Process):
2781 def __init__(self, *args):
2782 super(LogmanTrace.Context.Process, self).__init__(*args)
2783 # Handle file objects that succeeded.
2784 self.file_objects = {}
2785
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002786 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2787 logging.info(
2788 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2789 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002790 super(LogmanTrace.Context, self).__init__(blacklist)
2791 self._drive_map = DosDriveMap()
2792 # Threads mapping to the corresponding process id.
2793 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002794 # Process ID of the tracer, e.g. the temporary script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002795 # create_subprocess_thunk(). This is tricky because the process id may
2796 # have been reused.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002797 self._thunk_pid = thunk_pid
2798 self._thunk_cmd = thunk_cmd
2799 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002800 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002801 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002802
2803 def on_line(self, line):
2804 """Processes a json Event line."""
2805 self._line_number += 1
2806 try:
2807 # By Opcode
2808 handler = getattr(
2809 self,
2810 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2811 None)
2812 if not handler:
2813 raise TracingFailure(
2814 'Unexpected event %s_%s' % (
2815 line[self.EVENT_NAME], line[self.TYPE]),
2816 None, None, None)
2817 handler(line)
2818 except TracingFailure, e:
2819 # Hack in the values since the handler could be a static function.
2820 e.pid = line[self.PID]
2821 e.line = line
2822 e.line_number = self._line_number
2823 # Re-raise the modified exception.
2824 raise
2825 except (KeyError, NotImplementedError, ValueError), e:
2826 raise TracingFailure(
2827 'Trace generated a %s exception: %s' % (
2828 e.__class__.__name__, str(e)),
2829 line[self.PID],
2830 self._line_number,
2831 line,
2832 e)
2833
2834 def to_results(self):
2835 if not self.root_process:
2836 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002837 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002838 None, None, None)
2839 process = self.root_process.to_results_process()
2840 return Results(process)
2841
2842 def _thread_to_process(self, tid):
2843 """Finds the process from the thread id."""
2844 tid = int(tid, 16)
2845 pid = self._threads_active.get(tid)
2846 if not pid or not self._process_lookup.get(pid):
2847 return
2848 return self._process_lookup[pid]
2849
2850 @classmethod
2851 def handle_EventTrace_Header(cls, line):
2852 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2853 BUFFER_SIZE = cls.USER_DATA
2854 #VERSION = cls.USER_DATA + 1
2855 #PROVIDER_VERSION = cls.USER_DATA + 2
2856 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2857 #END_TIME = cls.USER_DATA + 4
2858 #TIMER_RESOLUTION = cls.USER_DATA + 5
2859 #MAX_FILE_SIZE = cls.USER_DATA + 6
2860 #LOG_FILE_MODE = cls.USER_DATA + 7
2861 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2862 #START_BUFFERS = cls.USER_DATA + 9
2863 #POINTER_SIZE = cls.USER_DATA + 10
2864 EVENTS_LOST = cls.USER_DATA + 11
2865 #CPU_SPEED = cls.USER_DATA + 12
2866 #LOGGER_NAME = cls.USER_DATA + 13
2867 #LOG_FILE_NAME = cls.USER_DATA + 14
2868 #BOOT_TIME = cls.USER_DATA + 15
2869 #PERF_FREQ = cls.USER_DATA + 16
2870 #START_TIME = cls.USER_DATA + 17
2871 #RESERVED_FLAGS = cls.USER_DATA + 18
2872 #BUFFERS_LOST = cls.USER_DATA + 19
2873 #SESSION_NAME_STRING = cls.USER_DATA + 20
2874 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2875 if line[EVENTS_LOST] != '0':
2876 raise TracingFailure(
2877 ( '%s events were lost during trace, please increase the buffer '
2878 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2879 None, None, None)
2880
2881 def handle_FileIo_Cleanup(self, line):
2882 """General wisdom: if a file is closed, it's because it was opened.
2883
2884 Note that FileIo_Close is not used since if a file was opened properly but
2885 not closed before the process exits, only Cleanup will be logged.
2886 """
2887 #IRP = self.USER_DATA
2888 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2889 FILE_OBJECT = self.USER_DATA + 2
2890 #FILE_KEY = self.USER_DATA + 3
2891 proc = self._thread_to_process(line[TTID])
2892 if not proc:
2893 # Not a process we care about.
2894 return
2895 file_object = line[FILE_OBJECT]
2896 if file_object in proc.file_objects:
maruel@chromium.org538141b2013-06-03 20:57:17 +00002897 filepath, access_type = proc.file_objects.pop(file_object)
2898 proc.add_file(filepath, access_type)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002899
2900 def handle_FileIo_Create(self, line):
2901 """Handles a file open.
2902
2903 All FileIo events are described at
2904 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2905 for some value of 'description'.
2906
2907 " (..) process and thread id values of the IO events (..) are not valid "
2908 http://msdn.microsoft.com/magazine/ee358703.aspx
2909
2910 The FileIo.Create event doesn't return if the CreateFile() call
2911 succeeded, so keep track of the file_object and check that it is
2912 eventually closed with FileIo_Cleanup.
2913 """
2914 #IRP = self.USER_DATA
2915 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2916 FILE_OBJECT = self.USER_DATA + 2
2917 #CREATE_OPTIONS = self.USER_DATA + 3
2918 #FILE_ATTRIBUTES = self.USER_DATA + 4
maruel@chromium.org538141b2013-06-03 20:57:17 +00002919 #SHARE_ACCESS = self.USER_DATA + 5
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002920 OPEN_PATH = self.USER_DATA + 6
2921
2922 proc = self._thread_to_process(line[TTID])
2923 if not proc:
2924 # Not a process we care about.
2925 return
2926
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002927 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002928 # Ignore directories and bare drive right away.
2929 if raw_path.endswith(os.path.sep):
2930 return
2931 filepath = self._drive_map.to_win32(raw_path)
2932 # Ignore bare drive right away. Some may still fall through with format
2933 # like '\\?\X:'
2934 if len(filepath) == 2:
2935 return
2936 file_object = line[FILE_OBJECT]
2937 if os.path.isdir(filepath):
2938 # There is no O_DIRECTORY equivalent on Windows. The closed is
2939 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2940 # simply discard directories are they are found.
2941 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002942 # Override any stale file object.
2943 # TODO(maruel): Figure out a way to detect if the file was opened for
2944 # reading or writting. Sadly CREATE_OPTIONS doesn't seem to be of any help
2945 # here. For now mark as None to make it clear we have no idea what it is
2946 # about.
2947 proc.file_objects[file_object] = (filepath, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002948
2949 def handle_FileIo_Rename(self, line):
2950 # TODO(maruel): Handle?
2951 pass
2952
2953 def handle_Process_End(self, line):
2954 pid = line[self.PID]
2955 if self._process_lookup.get(pid):
2956 logging.info('Terminated: %d' % pid)
2957 self._process_lookup[pid] = None
2958 else:
2959 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002960 if self._thunk_process and self._thunk_process.pid == pid:
2961 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002962
2963 def handle_Process_Start(self, line):
2964 """Handles a new child process started by PID."""
2965 #UNIQUE_PROCESS_KEY = self.USER_DATA
2966 PROCESS_ID = self.USER_DATA + 1
2967 #PARENT_PID = self.USER_DATA + 2
2968 #SESSION_ID = self.USER_DATA + 3
2969 #EXIT_STATUS = self.USER_DATA + 4
2970 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2971 #USER_SID = self.USER_DATA + 6
2972 IMAGE_FILE_NAME = self.USER_DATA + 7
2973 COMMAND_LINE = self.USER_DATA + 8
2974
2975 ppid = line[self.PID]
2976 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002977 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002978 logging.debug(
2979 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002980 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002981
2982 if pid == self._thunk_pid:
2983 # Need to ignore processes we don't know about because the log is
2984 # system-wide. self._thunk_pid shall start only one process.
2985 # This is tricky though because Windows *loves* to reuse process id and
2986 # it happens often that the process ID of the thunk script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002987 # create_subprocess_thunk() is reused. So just detecting the pid here is
2988 # not sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002989 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2990 logging.info(
2991 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2992 pid, self._trace_name, command_line, self._thunk_cmd)
2993 return
2994
2995 # TODO(maruel): The check is quite weak. Add the thunk path.
2996 if self._thunk_process:
2997 raise TracingFailure(
2998 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2999 'already set') % (self._thunk_pid, self._thunk_process.pid),
3000 None, None, None)
3001 proc = self.Process(self.blacklist, pid, None)
3002 self._thunk_process = proc
3003 return
3004 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003005 proc = self.Process(self.blacklist, pid, None)
3006 self.root_process = proc
3007 ppid = None
3008 elif self._process_lookup.get(ppid):
3009 proc = self.Process(self.blacklist, pid, None)
3010 self._process_lookup[ppid].children.append(proc)
3011 else:
3012 # Ignore
3013 return
3014 self._process_lookup[pid] = proc
3015
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003016 proc.command = command_line
3017 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003018 # proc.command[0] may be the absolute path of 'executable' but it may be
3019 # anything else too. If it happens that command[0] ends with executable,
3020 # use it, otherwise defaults to the base name.
3021 cmd0 = proc.command[0].lower()
3022 if not cmd0.endswith('.exe'):
3023 # TODO(maruel): That's not strictly true either.
3024 cmd0 += '.exe'
3025 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
3026 # Fix the path.
3027 cmd0 = cmd0.replace('/', os.path.sep)
3028 cmd0 = os.path.normpath(cmd0)
3029 proc.executable = get_native_path_case(cmd0)
3030 logging.info(
3031 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
3032
3033 def handle_Thread_End(self, line):
3034 """Has the same parameters as Thread_Start."""
3035 tid = int(line[self.TID], 16)
3036 self._threads_active.pop(tid, None)
3037
3038 def handle_Thread_Start(self, line):
3039 """Handles a new thread created.
3040
3041 Do not use self.PID here since a process' initial thread is created by
3042 the parent process.
3043 """
3044 PROCESS_ID = self.USER_DATA
3045 TTHREAD_ID = self.USER_DATA + 1
3046 #STACK_BASE = self.USER_DATA + 2
3047 #STACK_LIMIT = self.USER_DATA + 3
3048 #USER_STACK_BASE = self.USER_DATA + 4
3049 #USER_STACK_LIMIT = self.USER_DATA + 5
3050 #AFFINITY = self.USER_DATA + 6
3051 #WIN32_START_ADDR = self.USER_DATA + 7
3052 #TEB_BASE = self.USER_DATA + 8
3053 #SUB_PROCESS_TAG = self.USER_DATA + 9
3054 #BASE_PRIORITY = self.USER_DATA + 10
3055 #PAGE_PRIORITY = self.USER_DATA + 11
3056 #IO_PRIORITY = self.USER_DATA + 12
3057 #THREAD_FLAGS = self.USER_DATA + 13
3058 # Do not use self.PID here since a process' initial thread is created by
3059 # the parent process.
3060 pid = int(line[PROCESS_ID], 16)
3061 tid = int(line[TTHREAD_ID], 16)
3062 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
3063 self._threads_active[tid] = pid
3064
3065 @classmethod
3066 def supported_events(cls):
3067 """Returns all the procesed events."""
3068 out = []
3069 for member in dir(cls):
3070 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
3071 if match:
3072 out.append(match.groups())
3073 return out
3074
3075 class Tracer(ApiBase.Tracer):
3076 # The basic headers.
3077 EXPECTED_HEADER = [
3078 u'Event Name',
3079 u'Type',
3080 u'Event ID',
3081 u'Version',
3082 u'Channel',
3083 u'Level', # 5
3084 u'Opcode',
3085 u'Task',
3086 u'Keyword',
3087 u'PID',
3088 u'TID', # 10
3089 u'Processor Number',
3090 u'Instance ID',
3091 u'Parent Instance ID',
3092 u'Activity ID',
3093 u'Related Activity ID', # 15
3094 u'Clock-Time',
3095 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
3096 u'User(ms)', # pretty much useless.
3097 u'User Data', # Extra arguments that are event-specific.
3098 ]
3099 # Only the useful headers common to all entries are listed there. Any column
3100 # at 19 or higher is dependent on the specific event.
3101 EVENT_NAME = 0
3102 TYPE = 1
3103 PID = 9
3104 TID = 10
3105 PROCESSOR_ID = 11
3106 TIMESTAMP = 16
3107 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
3108 USER_DATA = 19
3109
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003110 class CsvReader(object):
3111 """CSV reader that reads files generated by tracerpt.exe.
3112
3113 csv.reader() fails to read them properly, it mangles file names quoted
3114 with "" with a comma in it.
3115 """
3116 # 0. Had a ',' or one of the following ' ' after a comma, next should
3117 # be ' ', '"' or string or ',' for an empty field.
3118 ( HAD_DELIMITER,
3119 # 1. Processing an unquoted field up to ','.
3120 IN_STR,
3121 # 2. Processing a new field starting with '"'.
3122 STARTING_STR_QUOTED,
3123 # 3. Second quote in a row at the start of a field. It could be either
3124 # '""foo""' or '""'. Who the hell thought it was a great idea to use
3125 # the same character for delimiting and escaping?
3126 STARTING_SECOND_QUOTE,
3127 # 4. A quote inside a quoted string where the previous character was
3128 # not a quote, so the string is not empty. Can be either: end of a
3129 # quoted string (a delimiter) or a quote escape. The next char must be
3130 # either '"' or ','.
3131 HAD_QUOTE_IN_QUOTED,
3132 # 5. Second quote inside a quoted string.
3133 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
3134 # 6. Processing a field that started with '"'.
3135 IN_STR_QUOTED) = range(7)
3136
3137 def __init__(self, f):
3138 self.f = f
3139
3140 def __iter__(self):
3141 return self
3142
3143 def next(self):
3144 """Splits the line in fields."""
3145 line = self.f.readline()
3146 if not line:
3147 raise StopIteration()
3148 line = line.strip()
3149 fields = []
3150 state = self.HAD_DELIMITER
3151 for i, c in enumerate(line):
3152 if state == self.HAD_DELIMITER:
3153 if c == ',':
3154 # Empty field.
3155 fields.append('')
3156 elif c == ' ':
3157 # Ignore initial whitespaces
3158 pass
3159 elif c == '"':
3160 state = self.STARTING_STR_QUOTED
3161 fields.append('')
3162 else:
3163 # Start of a new field.
3164 state = self.IN_STR
3165 fields.append(c)
3166
3167 elif state == self.IN_STR:
3168 # Do not accept quote inside unquoted field.
3169 assert c != '"', (i, c, line, fields)
3170 if c == ',':
3171 fields[-1] = fields[-1].strip()
3172 state = self.HAD_DELIMITER
3173 else:
3174 fields[-1] = fields[-1] + c
3175
3176 elif state == self.STARTING_STR_QUOTED:
3177 if c == '"':
3178 # Do not store the character yet.
3179 state = self.STARTING_SECOND_QUOTE
3180 else:
3181 state = self.IN_STR_QUOTED
3182 fields[-1] = fields[-1] + c
3183
3184 elif state == self.STARTING_SECOND_QUOTE:
3185 if c == ',':
3186 # It was an empty field. '""' == ''.
3187 state = self.HAD_DELIMITER
3188 else:
3189 fields[-1] = fields[-1] + '"' + c
3190 state = self.IN_STR_QUOTED
3191
3192 elif state == self.HAD_QUOTE_IN_QUOTED:
3193 if c == ',':
3194 # End of the string.
3195 state = self.HAD_DELIMITER
3196 elif c == '"':
3197 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
3198 else:
3199 # The previous double-quote was just an unescaped quote.
3200 fields[-1] = fields[-1] + '"' + c
3201 state = self.IN_STR_QUOTED
3202
3203 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
3204 if c == ',':
3205 # End of the string.
3206 state = self.HAD_DELIMITER
3207 fields[-1] = fields[-1] + '"'
3208 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00003209 # That's just how the logger rolls. Revert back to appending the
3210 # char and "guess" it was a quote in a double-quoted string.
3211 state = self.IN_STR_QUOTED
3212 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003213
3214 elif state == self.IN_STR_QUOTED:
3215 if c == '"':
3216 # Could be a delimiter or an escape.
3217 state = self.HAD_QUOTE_IN_QUOTED
3218 else:
3219 fields[-1] = fields[-1] + c
3220
3221 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
3222 fields[-1] = fields[-1] + '"'
3223 else:
3224 assert state in (
3225 # Terminated with a normal field.
3226 self.IN_STR,
3227 # Terminated with an empty field.
3228 self.STARTING_SECOND_QUOTE,
3229 # Terminated with a normal quoted field.
3230 self.HAD_QUOTE_IN_QUOTED), (
3231 line, state, fields)
3232 return fields
3233
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003234 def __init__(self, logname):
3235 """Starts the log collection.
3236
3237 Requires administrative access. logman.exe is synchronous so no need for a
3238 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
3239 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
3240
3241 One can get the list of potentially interesting providers with:
3242 "logman query providers | findstr /i file"
3243 """
3244 super(LogmanTrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00003245 self._signal_script = create_subprocess_thunk()
3246 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003247 cmd_start = [
3248 'logman.exe',
3249 'start',
3250 'NT Kernel Logger',
3251 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
3252 # splitio,fileiocompletion,syscall,file,cswitch,img
3253 '(process,fileio,thread)',
3254 '-o', self._logname + '.etl',
3255 '-ets', # Send directly to kernel
3256 # Values extracted out of thin air.
3257 # Event Trace Session buffer size in kb.
3258 '-bs', '10240',
3259 # Number of Event Trace Session buffers.
3260 '-nb', '16', '256',
3261 ]
3262 logging.debug('Running: %s' % cmd_start)
3263 try:
3264 subprocess.check_call(
3265 cmd_start,
3266 stdin=subprocess.PIPE,
3267 stdout=subprocess.PIPE,
3268 stderr=subprocess.STDOUT)
3269 except subprocess.CalledProcessError, e:
3270 if e.returncode == -2147024891:
3271 print >> sys.stderr, 'Please restart with an elevated admin prompt'
3272 elif e.returncode == -2144337737:
3273 print >> sys.stderr, (
3274 'A kernel trace was already running, stop it and try again')
3275 raise
3276
3277 def trace(self, cmd, cwd, tracename, output):
3278 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
3279 assert os.path.isabs(cmd[0]), cmd[0]
3280 assert os.path.isabs(cwd), cwd
3281 assert os.path.normpath(cwd) == cwd, cwd
3282 with self._lock:
3283 if not self._initialized:
3284 raise TracingFailure(
3285 'Called Tracer.trace() on an unitialized object',
3286 None, None, None, tracename)
3287 assert tracename not in (i['trace'] for i in self._traces)
3288
3289 # Use "logman -?" for help.
3290
3291 stdout = stderr = None
3292 if output:
3293 stdout = subprocess.PIPE
3294 stderr = subprocess.STDOUT
3295
3296 # Run the child process.
3297 logging.debug('Running: %s' % cmd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00003298 # Use the temporary script generated with create_subprocess_thunk() so we
3299 # have a clear pid owner. Since trace_inputs.py can be used as a library
3300 # and could trace multiple processes simultaneously, it makes it more
3301 # complex if the executable to be traced is executed directly here. It
3302 # also solves issues related to logman.exe that needs to be executed to
3303 # control the kernel trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003304 child_cmd = [
3305 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00003306 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003307 tracename,
3308 ]
3309 child = subprocess.Popen(
3310 child_cmd + fix_python_path(cmd),
3311 cwd=cwd,
3312 stdin=subprocess.PIPE,
3313 stdout=stdout,
3314 stderr=stderr)
3315 logging.debug('Started child pid: %d' % child.pid)
3316 out = child.communicate()[0]
3317 # This doesn't mean all the grand-children are done. Sadly, we don't have
3318 # a good way to determine that.
3319
3320 with self._lock:
3321 assert tracename not in (i['trace'] for i in self._traces)
3322 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00003323 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003324 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00003325 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003326 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00003327 # Used to figure out the real process when process ids are reused.
3328 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003329 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003330 })
3331
3332 return child.returncode, out
3333
3334 def close(self, _timeout=None):
3335 """Stops the kernel log collection and converts the traces to text
3336 representation.
3337 """
3338 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00003339 try:
3340 super(LogmanTrace.Tracer, self).close()
3341 finally:
3342 cmd_stop = [
3343 'logman.exe',
3344 'stop',
3345 'NT Kernel Logger',
3346 '-ets', # Sends the command directly to the kernel.
3347 ]
3348 logging.debug('Running: %s' % cmd_stop)
3349 subprocess.check_call(
3350 cmd_stop,
3351 stdin=subprocess.PIPE,
3352 stdout=subprocess.PIPE,
3353 stderr=subprocess.STDOUT)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003354
3355 def post_process_log(self):
3356 """Converts the .etl file into .csv then into .json."""
3357 super(LogmanTrace.Tracer, self).post_process_log()
3358 logformat = 'csv'
3359 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003360 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003361
maruel@chromium.org808e3f22013-05-16 16:42:54 +00003362 def _gen_logdata(self):
3363 return {
3364 'format': 'csv',
3365 'traces': self._traces,
3366 }
3367
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003368 def _trim_log(self, logformat):
3369 """Reduces the amount of data in original log by generating a 'reduced'
3370 log.
3371 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003372 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003373 file_handle = codecs.open(
3374 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003375
3376 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003377 assert sys.getfilesystemencoding() == 'mbcs'
3378 file_handle = codecs.open(
3379 self._logname + '.' + logformat, 'r',
3380 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003381
3382 supported_events = LogmanTrace.Context.supported_events()
3383
3384 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003385 """Loads items from the generator and returns the interesting data.
3386
3387 It filters out any uninteresting line and reduce the amount of data in
3388 the trace.
3389 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003390 for index, line in enumerate(generator):
3391 if not index:
3392 if line != self.EXPECTED_HEADER:
3393 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003394 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003395 None, None, None)
3396 continue
3397 # As you can see, the CSV is full of useful non-redundant information:
3398 if (line[2] != '0' or # Event ID
3399 line[3] not in ('2', '3') or # Version
3400 line[4] != '0' or # Channel
3401 line[5] != '0' or # Level
3402 line[7] != '0' or # Task
3403 line[8] != '0x0000000000000000' or # Keyword
3404 line[12] != '' or # Instance ID
3405 line[13] != '' or # Parent Instance ID
3406 line[14] != self.NULL_GUID or # Activity ID
3407 line[15] != ''): # Related Activity ID
3408 raise TracingFailure(
3409 'Found unexpected values in line: %s' % ' '.join(line),
3410 None, None, None)
3411
3412 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3413 continue
3414
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003415 yield [
3416 line[self.EVENT_NAME],
3417 line[self.TYPE],
3418 line[self.PID],
3419 line[self.TID],
3420 line[self.PROCESSOR_ID],
3421 line[self.TIMESTAMP],
3422 ] + line[self.USER_DATA:]
3423
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003424 # must not convert the trim() call into a list, since it will use too much
3425 # memory for large trace. use a csv file as a workaround since the json
3426 # parser requires a complete in-memory file.
3427 with open('%s.preprocessed' % self._logname, 'wb') as f:
3428 # $ and * can't be used in file name on windows, reducing the likelihood
3429 # of having to escape a string.
3430 out = csv.writer(
3431 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003432 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003433 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003434
3435 def _convert_log(self, logformat):
3436 """Converts the ETL trace to text representation.
3437
3438 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3439 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3440
3441 Arguments:
3442 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3443
3444 Use "tracerpt -?" for help.
3445 """
3446 LOCALE_INVARIANT = 0x7F
3447 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3448 cmd_convert = [
3449 'tracerpt.exe',
3450 '-l', self._logname + '.etl',
3451 '-o', self._logname + '.' + logformat,
3452 '-gmt', # Use UTC
3453 '-y', # No prompt
3454 # Use -of XML to get the header of each items after column 19, e.g. all
3455 # the actual headers of 'User Data'.
3456 ]
3457
3458 if logformat == 'csv':
3459 # tracerpt localizes the 'Type' column, for major brainfuck
3460 # entertainment. I can't imagine any sane reason to do that.
3461 cmd_convert.extend(['-of', 'CSV'])
3462 elif logformat == 'csv_utf16':
3463 # This causes it to use UTF-16, which doubles the log size but ensures
3464 # the log is readable for non-ASCII characters.
3465 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3466 elif logformat == 'xml':
3467 cmd_convert.extend(['-of', 'XML'])
3468 else:
3469 raise ValueError('Unexpected log format \'%s\'' % logformat)
3470 logging.debug('Running: %s' % cmd_convert)
3471 # This can takes tens of minutes for large logs.
3472 # Redirects all output to stderr.
3473 subprocess.check_call(
3474 cmd_convert,
3475 stdin=subprocess.PIPE,
3476 stdout=sys.stderr,
3477 stderr=sys.stderr)
3478
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003479 def __init__(self, use_sudo=False): # pylint: disable=W0613
3480 super(LogmanTrace, self).__init__()
3481 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3482
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003483 @staticmethod
3484 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003485 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003486 if os.path.isfile(logname + ext):
3487 os.remove(logname + ext)
3488
3489 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003490 def parse_log(cls, logname, blacklist, trace_name):
3491 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003492 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003493
3494 def blacklist_more(filepath):
3495 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3496 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3497
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003498 # Create a list of (Context, result_dict) tuples. This is necessary because
3499 # the csv file may be larger than the amount of available memory.
3500 contexes = [
3501 (
3502 cls.Context(
3503 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3504 {
3505 'output': item['output'],
3506 'trace': item['trace'],
3507 },
3508 )
3509 for item in read_json(logname)['traces']
3510 if not trace_name or item['trace'] == trace_name
3511 ]
3512
3513 # The log may be too large to fit in memory and it is not efficient to read
3514 # it multiple times, so multiplex the contexes instead, which is slightly
3515 # more awkward.
3516 with open('%s.preprocessed' % logname, 'rb') as f:
3517 lines = csv.reader(
3518 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3519 for encoded in lines:
3520 line = [s.decode('utf-8') for s in encoded]
3521 # Convert the PID in-place from hex.
3522 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3523 for context in contexes:
3524 if 'exception' in context[1]:
3525 continue
3526 try:
3527 context[0].on_line(line)
3528 except TracingFailure:
3529 context[1]['exception'] = sys.exc_info()
3530
3531 for context in contexes:
3532 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003533 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003534 context[1]['results'] = context[0].to_results()
3535
3536 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003537
3538
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003539def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003540 """Returns the correct implementation for the current OS."""
3541 if sys.platform == 'cygwin':
3542 raise NotImplementedError(
3543 'Not implemented for cygwin, start the script from Win32 python')
3544 flavors = {
3545 'win32': LogmanTrace,
3546 'darwin': Dtrace,
3547 'sunos5': Dtrace,
3548 'freebsd7': Dtrace,
3549 'freebsd8': Dtrace,
3550 }
3551 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003552 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003553
3554
3555def extract_directories(root_dir, files, blacklist):
3556 """Detects if all the files in a directory are in |files| and if so, replace
3557 the individual files by a Results.Directory instance.
3558
3559 Takes a list of Results.File instances and returns a shorter list of
3560 Results.File and Results.Directory instances.
3561
3562 Arguments:
3563 - root_dir: Optional base directory that shouldn't be search further.
3564 - files: list of Results.File instances.
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003565 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003566 """
3567 logging.info(
3568 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3569 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003570 # It is important for root_dir to not be a symlinked path, make sure to call
3571 # os.path.realpath() as needed.
3572 assert not root_dir or (
3573 os.path.realpath(get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003574 assert not any(isinstance(f, Results.Directory) for f in files)
3575 # Remove non existent files.
3576 files = [f for f in files if f.existent]
3577 if not files:
3578 return files
3579 # All files must share the same root, which can be None.
3580 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3581
3582 # Creates a {directory: {filename: File}} mapping, up to root.
3583 buckets = {}
3584 if root_dir:
3585 buckets[root_dir] = {}
3586 for fileobj in files:
3587 path = fileobj.full_path
3588 directory = os.path.dirname(path)
3589 assert directory
3590 # Do not use os.path.basename() so trailing os.path.sep is kept.
3591 basename = path[len(directory)+1:]
3592 files_in_directory = buckets.setdefault(directory, {})
3593 files_in_directory[basename] = fileobj
3594 # Add all the directories recursively up to root.
3595 while True:
3596 old_d = directory
3597 directory = os.path.dirname(directory)
3598 if directory + os.path.sep == root_dir or directory == old_d:
3599 break
3600 buckets.setdefault(directory, {})
3601
3602 root_prefix = len(root_dir) + 1 if root_dir else 0
3603 for directory in sorted(buckets, reverse=True):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003604 if not os.path.isdir(directory):
3605 logging.debug(
3606 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3607 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003608 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3609 expected = set(buckets[directory])
3610 if not (actual - expected):
3611 parent = os.path.dirname(directory)
3612 buckets[parent][os.path.basename(directory)] = Results.Directory(
3613 root_dir,
3614 directory[root_prefix:],
3615 False,
3616 sum(f.size for f in buckets[directory].itervalues()),
3617 sum(f.nb_files for f in buckets[directory].itervalues()))
3618 # Remove the whole bucket.
3619 del buckets[directory]
3620
3621 # Reverse the mapping with what remains. The original instances are returned,
3622 # so the cached meta data is kept.
3623 files = sum((x.values() for x in buckets.itervalues()), [])
3624 return sorted(files, key=lambda x: x.path)
3625
3626
3627def trace(logfile, cmd, cwd, api, output):
3628 """Traces an executable. Returns (returncode, output) from api.
3629
3630 Arguments:
3631 - logfile: file to write to.
3632 - cmd: command to run.
3633 - cwd: current directory to start the process in.
3634 - api: a tracing api instance.
3635 - output: if True, returns output, otherwise prints it at the console.
3636 """
3637 cmd = fix_python_path(cmd)
3638 api.clean_trace(logfile)
3639 with api.get_tracer(logfile) as tracer:
3640 return tracer.trace(cmd, cwd, 'default', output)
3641
3642
maruel@chromium.orge5322512013-08-19 20:17:57 +00003643def CMDclean(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003644 """Cleans up traces."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003645 options, args = parser.parse_args(args)
3646 api = get_api()
3647 api.clean_trace(options.log)
3648 return 0
3649
3650
maruel@chromium.orge5322512013-08-19 20:17:57 +00003651def CMDtrace(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003652 """Traces an executable."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003653 parser.allow_interspersed_args = False
3654 parser.add_option(
3655 '-q', '--quiet', action='store_true',
3656 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003657 parser.add_option(
3658 '-s', '--sudo', action='store_true',
3659 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3660 parser.add_option(
3661 '-n', '--no-sudo', action='store_false',
3662 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003663 options, args = parser.parse_args(args)
3664
3665 if not args:
3666 parser.error('Please provide a command to run')
3667
3668 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3669 args[0] = os.path.abspath(args[0])
3670
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003671 # options.sudo default value is None, which is to do whatever tracer defaults
3672 # do.
3673 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003674 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3675
3676
maruel@chromium.orge5322512013-08-19 20:17:57 +00003677def CMDread(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003678 """Reads the logs and prints the result."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003679 parser.add_option(
3680 '-V', '--variable',
3681 nargs=2,
3682 action='append',
3683 dest='variables',
3684 metavar='VAR_NAME directory',
3685 default=[],
3686 help=('Variables to replace relative directories against. Example: '
3687 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3688 'home dir with $HOME') % getpass.getuser())
3689 parser.add_option(
3690 '--root-dir',
3691 help='Root directory to base everything off it. Anything outside of this '
3692 'this directory will not be reported')
3693 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003694 '--trace-name',
3695 help='Only reads one of the trace. Defaults to reading all traces')
3696 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003697 '-j', '--json', action='store_true',
3698 help='Outputs raw result data as json')
3699 parser.add_option(
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003700 '--trace-blacklist', action='append', default=[],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003701 help='List of regexp to use as blacklist filter')
3702 options, args = parser.parse_args(args)
3703
3704 if options.root_dir:
maruel@chromium.org7dc17b32013-04-02 22:27:05 +00003705 options.root_dir = get_native_path_case(
3706 unicode(os.path.abspath(options.root_dir)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003707
3708 variables = dict(options.variables)
3709 api = get_api()
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003710 blacklist = gen_blacklist(options.trace_blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003711 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003712 # Process each trace.
3713 output_as_json = []
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003714 try:
3715 for item in data:
3716 if 'exception' in item:
3717 # Do not abort the other traces.
3718 print >> sys.stderr, (
3719 'Trace %s: Got an exception: %s' % (
3720 item['trace'], item['exception'][1]))
3721 continue
3722 results = item['results']
3723 if options.root_dir:
3724 results = results.strip_root(options.root_dir)
3725
3726 if options.json:
3727 output_as_json.append(results.flatten())
3728 else:
3729 simplified = extract_directories(
3730 options.root_dir, results.files, blacklist)
3731 simplified = [f.replace_variables(variables) for f in simplified]
3732 if len(data) > 1:
3733 print('Trace: %s' % item['trace'])
3734 print('Total: %d' % len(results.files))
3735 print('Non existent: %d' % len(results.non_existent))
3736 for f in results.non_existent:
3737 print(' %s' % f.path)
3738 print(
3739 'Interesting: %d reduced to %d' % (
3740 len(results.existent), len(simplified)))
3741 for f in simplified:
3742 print(' %s' % f.path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003743
3744 if options.json:
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003745 write_json(sys.stdout, output_as_json, False)
3746 except KeyboardInterrupt:
3747 return 1
3748 except IOError as e:
3749 if e.errno == errno.EPIPE:
3750 # Do not print a stack trace when the output is piped to less and the user
3751 # quits before the whole output was written.
3752 return 1
3753 raise
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003754 return 0
3755
3756
3757class OptionParserWithLogging(optparse.OptionParser):
3758 """Adds --verbose option."""
3759 def __init__(self, verbose=0, **kwargs):
3760 optparse.OptionParser.__init__(self, **kwargs)
3761 self.add_option(
3762 '-v', '--verbose',
3763 action='count',
3764 default=verbose,
3765 help='Use multiple times to increase verbosity')
3766
3767 def parse_args(self, *args, **kwargs):
3768 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
3769 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
3770 logging.basicConfig(
3771 level=levels[min(len(levels)-1, options.verbose)],
3772 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
3773 return options, args
3774
3775
maruel@chromium.orge5322512013-08-19 20:17:57 +00003776class OptionParserTraceInputs(OptionParserWithLogging):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003777 """Adds automatic --log handling."""
3778 def __init__(self, **kwargs):
maruel@chromium.orge5322512013-08-19 20:17:57 +00003779 OptionParserWithLogging.__init__(self, **kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003780 self.add_option(
3781 '-l', '--log', help='Log file to generate or read, required')
3782
3783 def parse_args(self, *args, **kwargs):
3784 """Makes sure the paths make sense.
3785
3786 On Windows, / and \ are often mixed together in a path.
3787 """
maruel@chromium.orge5322512013-08-19 20:17:57 +00003788 options, args = OptionParserWithLogging.parse_args(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003789 self, *args, **kwargs)
3790 if not options.log:
3791 self.error('Must supply a log file with -l')
3792 options.log = os.path.abspath(options.log)
3793 return options, args
3794
3795
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003796def main(argv):
maruel@chromium.orge5322512013-08-19 20:17:57 +00003797 dispatcher = subcommand.CommandDispatcher(__name__)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003798 try:
maruel@chromium.orge5322512013-08-19 20:17:57 +00003799 return dispatcher.execute(OptionParserTraceInputs(), argv)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003800 except TracingFailure, e:
3801 sys.stderr.write('\nError: ')
3802 sys.stderr.write(str(e))
3803 sys.stderr.write('\n')
3804 return 1
3805
3806
3807if __name__ == '__main__':
maruel@chromium.orge5322512013-08-19 20:17:57 +00003808 fix_encoding.fix_encoding()
3809 disable_buffering()
3810 colorama.init()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003811 sys.exit(main(sys.argv[1:]))