blob: dd9c8045dbea0ae14a45525d54ca6b971462420a [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
3# Copyright (c) 2012 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
maruel@chromium.org9e56b592013-05-21 16:56:05 +000023import errno
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000024import getpass
25import glob
26import json
27import logging
28import optparse
29import os
30import re
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000031import stat
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000032import subprocess
33import sys
34import tempfile
35import threading
36import time
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000037import unicodedata
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000038import weakref
39
40## OS-specific imports
41
42if sys.platform == 'win32':
43 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p
44 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611
45 from ctypes.wintypes import GetLastError # pylint: disable=E0611
46elif sys.platform == 'darwin':
47 import Carbon.File # pylint: disable=F0401
48 import MacOS # pylint: disable=F0401
49
50
51BASE_DIR = os.path.dirname(os.path.abspath(__file__))
52ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
53
54
55class TracingFailure(Exception):
56 """An exception occured during tracing."""
57 def __init__(self, description, pid, line_number, line, *args):
58 super(TracingFailure, self).__init__(
59 description, pid, line_number, line, *args)
60 self.description = description
61 self.pid = pid
62 self.line_number = line_number
63 self.line = line
64 self.extra = args
65
66 def __str__(self):
67 out = self.description
68 if self.pid:
69 out += '\npid: %d' % self.pid
70 if self.line_number:
71 out += '\nline: %d' % self.line_number
72 if self.line:
73 out += '\n%s' % self.line
74 if self.extra:
75 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
76 return out
77
78
79## OS-specific functions
80
81if sys.platform == 'win32':
82 def QueryDosDevice(drive_letter):
83 """Returns the Windows 'native' path for a DOS drive letter."""
84 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
maruel@chromium.org306e0e72012-11-02 18:22:03 +000085 assert isinstance(drive_letter, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000086 # Guesswork. QueryDosDeviceW never returns the required number of bytes.
87 chars = 1024
maruel@chromium.org306e0e72012-11-02 18:22:03 +000088 drive_letter = drive_letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000089 p = create_unicode_buffer(chars)
90 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):
91 err = GetLastError()
92 if err:
93 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +000094 msg = u'QueryDosDevice(%s): %s (%d)' % (
95 drive_letter, FormatError(err), err)
96 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000097 return p.value
98
99
100 def GetShortPathName(long_path):
101 """Returns the Windows short path equivalent for a 'long' path."""
maruel@chromium.org68bb1482013-02-15 22:36:24 +0000102 assert isinstance(long_path, unicode), repr(long_path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000103 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
104 # not enforced.
105 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):
106 long_path = '\\\\?\\' + long_path
107 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)
108 if chars:
109 p = create_unicode_buffer(chars)
110 if windll.kernel32.GetShortPathNameW(long_path, p, chars):
111 return p.value
112
113 err = GetLastError()
114 if err:
115 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000116 msg = u'GetShortPathName(%s): %s (%d)' % (
117 long_path, FormatError(err), err)
118 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000119
120
121 def GetLongPathName(short_path):
122 """Returns the Windows long path equivalent for a 'short' path."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000123 assert isinstance(short_path, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000124 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
125 # not enforced.
126 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):
127 short_path = '\\\\?\\' + short_path
128 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)
129 if chars:
130 p = create_unicode_buffer(chars)
131 if windll.kernel32.GetLongPathNameW(short_path, p, chars):
132 return p.value
133
134 err = GetLastError()
135 if err:
136 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000137 msg = u'GetLongPathName(%s): %s (%d)' % (
138 short_path, FormatError(err), err)
139 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000140
141
142 def get_current_encoding():
143 """Returns the 'ANSI' code page associated to the process."""
144 return 'cp%d' % int(windll.kernel32.GetACP())
145
146
147 class DosDriveMap(object):
148 """Maps \Device\HarddiskVolumeN to N: on Windows."""
149 # Keep one global cache.
150 _MAPPING = {}
151
152 def __init__(self):
153 """Lazy loads the cache."""
154 if not self._MAPPING:
155 # This is related to UNC resolver on windows. Ignore that.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000156 self._MAPPING[u'\\Device\\Mup'] = None
157 self._MAPPING[u'\\SystemRoot'] = os.environ[u'SystemRoot']
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000158
159 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
160 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000161 letter = u'%s:' % letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000162 mapped = QueryDosDevice(letter)
163 if mapped in self._MAPPING:
164 logging.warn(
165 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
166 '. Drive letters are a user-mode concept and the kernel '
167 'traces only have NT path, so all accesses will be '
168 'associated with the first drive letter, independent of the '
169 'actual letter used by the code') % (
170 self._MAPPING[mapped], letter))
171 else:
172 self._MAPPING[mapped] = letter
173 except WindowsError: # pylint: disable=E0602
174 pass
175
176 def to_win32(self, path):
177 """Converts a native NT path to Win32/DOS compatible path."""
178 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
179 if not match:
180 raise ValueError(
181 'Can\'t convert %s into a Win32 compatible path' % path,
182 path)
183 if not match.group(1) in self._MAPPING:
184 # Unmapped partitions may be accessed by windows for the
185 # fun of it while the test is running. Discard these.
186 return None
187 drive = self._MAPPING[match.group(1)]
188 if not drive or not match.group(2):
189 return drive
190 return drive + match.group(2)
191
192
193 def isabs(path):
194 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""
195 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
196
197
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000198 def find_item_native_case(root, item):
199 """Gets the native path case of a single item based at root_path."""
200 if item == '..':
201 return item
202
203 root = get_native_path_case(root)
204 return os.path.basename(get_native_path_case(os.path.join(root, item)))
205
206
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000207 def get_native_path_case(p):
208 """Returns the native path case for an existing file.
209
210 On Windows, removes any leading '\\?\'.
211 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000212 assert isinstance(p, unicode), repr(p)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000213 if not isabs(p):
214 raise ValueError(
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000215 'get_native_path_case(%r): Require an absolute path' % p, p)
216
maruel@chromium.org037758d2012-12-10 17:59:46 +0000217 # Make sure it is normalized to os.path.sep. Do not do it here to keep the
218 # function fast
219 assert '/' not in p, p
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000220 suffix = ''
221 count = p.count(':')
222 if count > 1:
223 # This means it has an alternate-data stream. There could be 3 ':', since
224 # it could be the $DATA datastream of an ADS. Split the whole ADS suffix
225 # off and add it back afterward. There is no way to know the native path
226 # case of an alternate data stream.
227 items = p.split(':')
228 p = ':'.join(items[0:2])
229 suffix = ''.join(':' + i for i in items[2:])
230
maruel@chromium.org42b091d2013-04-02 22:27:02 +0000231 # TODO(maruel): Use os.path.normpath?
232 if p.endswith('.\\'):
233 p = p[:-2]
234
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000235 # Windows used to have an option to turn on case sensitivity on non Win32
236 # subsystem but that's out of scope here and isn't supported anymore.
237 # Go figure why GetShortPathName() is needed.
238 try:
239 out = GetLongPathName(GetShortPathName(p))
240 except OSError, e:
241 if e.args[0] in (2, 3, 5):
242 # The path does not exist. Try to recurse and reconstruct the path.
243 base = os.path.dirname(p)
244 rest = os.path.basename(p)
245 return os.path.join(get_native_path_case(base), rest)
246 raise
247 if out.startswith('\\\\?\\'):
248 out = out[4:]
249 # Always upper case the first letter since GetLongPathName() will return the
250 # drive letter in the case it was given.
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000251 return out[0].upper() + out[1:] + suffix
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000252
253
254 def CommandLineToArgvW(command_line):
255 """Splits a commandline into argv using CommandLineToArgvW()."""
256 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
257 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000258 assert isinstance(command_line, unicode)
259 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000260 try:
261 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
262 finally:
263 windll.kernel32.LocalFree(ptr)
264
265
266elif sys.platform == 'darwin':
267
268
269 # On non-windows, keep the stdlib behavior.
270 isabs = os.path.isabs
271
272
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000273 def _native_case(p):
274 """Gets the native path case. Warning: this function resolves symlinks."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000275 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000276 rel_ref, _ = Carbon.File.FSPathMakeRef(p.encode('utf-8'))
277 # The OSX underlying code uses NFD but python strings are in NFC. This
278 # will cause issues with os.listdir() for example. Since the dtrace log
279 # *is* in NFC, normalize it here.
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000280 out = unicodedata.normalize(
281 'NFC', rel_ref.FSRefMakePath().decode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000282 if p.endswith(os.path.sep) and not out.endswith(os.path.sep):
283 return out + os.path.sep
284 return out
285 except MacOS.Error, e:
286 if e.args[0] in (-43, -120):
287 # The path does not exist. Try to recurse and reconstruct the path.
288 # -43 means file not found.
289 # -120 means directory not found.
290 base = os.path.dirname(p)
291 rest = os.path.basename(p)
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000292 return os.path.join(_native_case(base), rest)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000293 raise OSError(
294 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])
295
296
297 def _split_at_symlink_native(base_path, rest):
298 """Returns the native path for a symlink."""
299 base, symlink, rest = split_at_symlink(base_path, rest)
300 if symlink:
301 if not base_path:
302 base_path = base
303 else:
304 base_path = safe_join(base_path, base)
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000305 symlink = find_item_native_case(base_path, symlink)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000306 return base, symlink, rest
307
308
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000309 def find_item_native_case(root_path, item):
310 """Gets the native path case of a single item based at root_path.
311
312 There is no API to get the native path case of symlinks on OSX. So it
313 needs to be done the slow way.
314 """
315 if item == '..':
316 return item
317
318 item = item.lower()
319 for element in os.listdir(root_path):
320 if element.lower() == item:
321 return element
322
323
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000324 def get_native_path_case(path):
325 """Returns the native path case for an existing file.
326
327 Technically, it's only HFS+ on OSX that is case preserving and
328 insensitive. It's the default setting on HFS+ but can be changed.
329 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000330 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000331 if not isabs(path):
332 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000333 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000334 if path.startswith('/dev'):
335 # /dev is not visible from Carbon, causing an exception.
336 return path
337
338 # Starts assuming there is no symlink along the path.
339 resolved = _native_case(path)
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000340 if path.lower() in (resolved.lower(), resolved.lower() + './'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000341 # This code path is incredibly faster.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000342 logging.debug('get_native_path_case(%s) = %s' % (path, resolved))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000343 return resolved
344
345 # There was a symlink, process it.
346 base, symlink, rest = _split_at_symlink_native(None, path)
347 assert symlink, (path, base, symlink, rest, resolved)
348 prev = base
349 base = safe_join(_native_case(base), symlink)
350 assert len(base) > len(prev)
351 while rest:
352 prev = base
353 relbase, symlink, rest = _split_at_symlink_native(base, rest)
354 base = safe_join(base, relbase)
355 assert len(base) > len(prev), (prev, base, symlink)
356 if symlink:
357 base = safe_join(base, symlink)
358 assert len(base) > len(prev), (prev, base, symlink)
359 # Make sure no symlink was resolved.
360 assert base.lower() == path.lower(), (base, path)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000361 logging.debug('get_native_path_case(%s) = %s' % (path, base))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000362 return base
363
364
365else: # OSes other than Windows and OSX.
366
367
368 # On non-windows, keep the stdlib behavior.
369 isabs = os.path.isabs
370
371
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000372 def find_item_native_case(root, item):
373 """Gets the native path case of a single item based at root_path."""
374 if item == '..':
375 return item
376
377 root = get_native_path_case(root)
378 return os.path.basename(get_native_path_case(os.path.join(root, item)))
379
380
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000381 def get_native_path_case(path):
382 """Returns the native path case for an existing file.
383
384 On OSes other than OSX and Windows, assume the file system is
385 case-sensitive.
386
387 TODO(maruel): This is not strictly true. Implement if necessary.
388 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000389 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000390 if not isabs(path):
391 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000392 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000393 # Give up on cygwin, as GetLongPathName() can't be called.
394 # Linux traces tends to not be normalized so use this occasion to normalize
395 # it. This function implementation already normalizes the path on the other
396 # OS so this needs to be done here to be coherent between OSes.
397 out = os.path.normpath(path)
398 if path.endswith(os.path.sep) and not out.endswith(os.path.sep):
399 return out + os.path.sep
400 return out
401
402
403if sys.platform != 'win32': # All non-Windows OSes.
404
405
406 def safe_join(*args):
407 """Joins path elements like os.path.join() but doesn't abort on absolute
408 path.
409
410 os.path.join('foo', '/bar') == '/bar'
411 but safe_join('foo', '/bar') == 'foo/bar'.
412 """
413 out = ''
414 for element in args:
415 if element.startswith(os.path.sep):
416 if out.endswith(os.path.sep):
417 out += element[1:]
418 else:
419 out += element
420 else:
421 if out.endswith(os.path.sep):
422 out += element
423 else:
424 out += os.path.sep + element
425 return out
426
427
428 def split_at_symlink(base_dir, relfile):
429 """Scans each component of relfile and cut the string at the symlink if
430 there is any.
431
432 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if
433 not symlink was found.
434 """
435 if base_dir:
436 assert relfile
437 assert os.path.isabs(base_dir)
438 index = 0
439 else:
440 assert os.path.isabs(relfile)
441 index = 1
442
443 def at_root(rest):
444 if base_dir:
445 return safe_join(base_dir, rest)
446 return rest
447
448 while True:
449 try:
450 index = relfile.index(os.path.sep, index)
451 except ValueError:
452 index = len(relfile)
453 full = at_root(relfile[:index])
454 if os.path.islink(full):
455 # A symlink!
456 base = os.path.dirname(relfile[:index])
457 symlink = os.path.basename(relfile[:index])
458 rest = relfile[index:]
459 logging.debug(
460 'split_at_symlink(%s, %s) -> (%s, %s, %s)' %
461 (base_dir, relfile, base, symlink, rest))
462 return base, symlink, rest
463 if index == len(relfile):
464 break
465 index += 1
466 return relfile, None, None
467
468
maruel@chromium.orgac36fb72013-05-21 14:50:53 +0000469class Unbuffered(object):
470 """Disable buffering on a file object."""
471 def __init__(self, stream):
472 self.stream = stream
473
474 def write(self, data):
475 self.stream.write(data)
476 if '\n' in data:
477 self.stream.flush()
478
479 def __getattr__(self, attr):
480 return getattr(self.stream, attr)
481
482
483def disable_buffering():
484 """Makes this process and child processes stdout unbuffered."""
485 if not os.environ.get('PYTHONUNBUFFERED'):
486 # Since sys.stdout is a C++ object, it's impossible to do
487 # sys.stdout.write = lambda...
488 sys.stdout = Unbuffered(sys.stdout)
489 os.environ['PYTHONUNBUFFERED'] = 'x'
490
491
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000492def fix_python_path(cmd):
493 """Returns the fixed command line to call the right python executable."""
494 out = cmd[:]
495 if out[0] == 'python':
496 out[0] = sys.executable
497 elif out[0].endswith('.py'):
498 out.insert(0, sys.executable)
499 return out
500
501
maruel@chromium.org3683afe2013-07-27 00:09:27 +0000502def gen_blacklist(regexes):
503 """Returns a lambda to be used as a blacklist."""
504 compiled = [re.compile(i) for i in regexes]
505 def match(f):
506 return any(j.match(f) for j in compiled)
507 return match
508
509
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000510def create_subprocess_thunk():
511 """Creates a small temporary script to start the child process.
512
513 This thunk doesn't block, its unique name is used to identify it as the
514 parent.
515 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000516 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000517 try:
518 os.write(
519 handle,
520 (
521 'import subprocess, sys\n'
522 'sys.exit(subprocess.call(sys.argv[2:]))\n'
523 ))
524 finally:
525 os.close(handle)
526 return name
527
528
529def create_exec_thunk():
530 """Creates a small temporary script to start the child executable.
531
532 Reads from the file handle provided as the fisrt argument to block, then
533 execv() the command to be traced.
534 """
535 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
536 try:
537 os.write(
538 handle,
539 (
540 'import os, sys\n'
541 'fd = int(sys.argv[1])\n'
542 # This will block until the controlling process writes a byte on the
543 # pipe. It will do so once the tracing tool, e.g. strace, is ready to
544 # trace.
545 'os.read(fd, 1)\n'
546 'os.close(fd)\n'
547 'os.execv(sys.argv[2], sys.argv[2:])\n'
548 ))
549 finally:
550 os.close(handle)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000551 return name
552
553
554def strace_process_quoted_arguments(text):
555 """Extracts quoted arguments on a string and return the arguments as a list.
556
557 Implemented as an automaton. Supports incomplete strings in the form
558 '"foo"...'.
559
560 Example:
561 With text = '"foo", "bar"', the function will return ['foo', 'bar']
562
563 TODO(maruel): Implement escaping.
564 """
565 # All the possible states of the DFA.
566 ( NEED_QUOTE, # Begining of a new arguments.
567 INSIDE_STRING, # Inside an argument.
568 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
569 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
570 # a serie of 3 dots or a comma.
571 NEED_SPACE, # Right after a comma
572 NEED_DOT_2, # Found a dot, need a second one.
573 NEED_DOT_3, # Found second dot, need a third one.
574 NEED_COMMA, # Found third dot, need a comma.
575 ) = range(8)
576
577 state = NEED_QUOTE
578 out = []
579 for index, char in enumerate(text):
580 if char == '"':
581 if state == NEED_QUOTE:
582 state = INSIDE_STRING
583 # A new argument was found.
584 out.append('')
585 elif state == INSIDE_STRING:
586 # The argument is now closed.
587 state = NEED_COMMA_OR_DOT
588 elif state == ESCAPED:
589 out[-1] += char
590 state = INSIDE_STRING
591 else:
592 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000593 'Can\'t process char \'%s\' at column %d for: %r' % (
594 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000595 index,
596 text)
597 elif char == ',':
598 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
599 state = NEED_SPACE
600 elif state == INSIDE_STRING:
601 out[-1] += char
602 elif state == ESCAPED:
603 out[-1] += char
604 state = INSIDE_STRING
605 else:
606 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000607 'Can\'t process char \'%s\' at column %d for: %r' % (
608 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000609 index,
610 text)
611 elif char == ' ':
612 if state == NEED_SPACE:
613 state = NEED_QUOTE
614 elif state == INSIDE_STRING:
615 out[-1] += char
616 elif state == ESCAPED:
617 out[-1] += char
618 state = INSIDE_STRING
619 else:
620 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000621 'Can\'t process char \'%s\' at column %d for: %r' % (
622 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000623 index,
624 text)
625 elif char == '.':
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000626 if state in (NEED_QUOTE, NEED_COMMA_OR_DOT):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000627 # The string is incomplete, this mean the strace -s flag should be
628 # increased.
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000629 # For NEED_QUOTE, the input string would look like '"foo", ...'.
630 # For NEED_COMMA_OR_DOT, the input string would look like '"foo"...'
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000631 state = NEED_DOT_2
632 elif state == NEED_DOT_2:
633 state = NEED_DOT_3
634 elif state == NEED_DOT_3:
635 state = NEED_COMMA
636 elif state == INSIDE_STRING:
637 out[-1] += char
638 elif state == ESCAPED:
639 out[-1] += char
640 state = INSIDE_STRING
641 else:
642 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000643 'Can\'t process char \'%s\' at column %d for: %r' % (
644 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000645 index,
646 text)
647 elif char == '\\':
648 if state == ESCAPED:
649 out[-1] += char
650 state = INSIDE_STRING
651 elif state == INSIDE_STRING:
652 state = ESCAPED
653 else:
654 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000655 'Can\'t process char \'%s\' at column %d for: %r' % (
656 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000657 index,
658 text)
659 else:
660 if state == INSIDE_STRING:
661 out[-1] += char
662 else:
663 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000664 'Can\'t process char \'%s\' at column %d for: %r' % (
665 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000666 index,
667 text)
668 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
669 raise ValueError(
670 'String is incorrectly terminated: %r' % text,
671 text)
672 return out
673
674
675def read_json(filepath):
676 with open(filepath, 'r') as f:
677 return json.load(f)
678
679
680def write_json(filepath_or_handle, data, dense):
681 """Writes data into filepath or file handle encoded as json.
682
683 If dense is True, the json is packed. Otherwise, it is human readable.
684 """
685 if hasattr(filepath_or_handle, 'write'):
686 if dense:
687 filepath_or_handle.write(
688 json.dumps(data, sort_keys=True, separators=(',',':')))
689 else:
690 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
691 else:
692 with open(filepath_or_handle, 'wb') as f:
693 if dense:
694 json.dump(data, f, sort_keys=True, separators=(',',':'))
695 else:
696 json.dump(data, f, sort_keys=True, indent=2)
697
698
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000699def assert_is_renderable(pseudo_string):
700 """Asserts the input is a valid object to be processed by render()."""
701 assert (
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000702 pseudo_string is None or
703 isinstance(pseudo_string, unicode) or
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000704 hasattr(pseudo_string, 'render')), repr(pseudo_string)
705
706
707def render(pseudo_string):
708 """Converts the pseudo-string to an unicode string."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000709 if pseudo_string is None or isinstance(pseudo_string, unicode):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000710 return pseudo_string
711 return pseudo_string.render()
712
713
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000714class Results(object):
715 """Results of a trace session."""
716
717 class _TouchedObject(object):
718 """Something, a file or a directory, that was accessed."""
719 def __init__(self, root, path, tainted, size, nb_files):
720 logging.debug(
721 '%s(%s, %s, %s, %s, %s)' %
722 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000723 assert_is_renderable(root)
724 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000725 self.root = root
726 self.path = path
727 self.tainted = tainted
728 self.nb_files = nb_files
maruel@chromium.org538141b2013-06-03 20:57:17 +0000729 # Can be used as a cache or a default value, depending on context. In
730 # particular, once self.tainted is True, because the path was replaced
731 # with a variable, it is not possible to look up the file size.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000732 self._size = size
733 # These are cache only.
734 self._real_path = None
735
736 # Check internal consistency.
737 assert path, path
738 assert tainted or bool(root) != bool(isabs(path)), (root, path)
739 assert tainted or (
740 not os.path.exists(self.full_path) or
741 (self.full_path == get_native_path_case(self.full_path))), (
742 tainted, self.full_path, get_native_path_case(self.full_path))
743
744 @property
745 def existent(self):
746 return self.size != -1
747
748 @property
749 def full_path(self):
750 if self.root:
751 return os.path.join(self.root, self.path)
752 return self.path
753
754 @property
755 def real_path(self):
756 """Returns the path with symlinks resolved."""
757 if not self._real_path:
758 self._real_path = os.path.realpath(self.full_path)
759 return self._real_path
760
761 @property
762 def size(self):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000763 """File's size. -1 is not existent.
764
765 Once tainted, it is not possible the retrieve the file size anymore since
766 the path is composed of variables.
767 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000768 if self._size is None and not self.tainted:
769 try:
770 self._size = os.stat(self.full_path).st_size
771 except OSError:
772 self._size = -1
773 return self._size
774
775 def flatten(self):
776 """Returns a dict representing this object.
777
778 A 'size' of 0 means the file was only touched and not read.
779 """
780 return {
781 'path': self.path,
782 'size': self.size,
783 }
784
785 def replace_variables(self, variables):
786 """Replaces the root of this File with one of the variables if it matches.
787
788 If a variable replacement occurs, the cloned object becomes tainted.
789 """
790 for variable, root_path in variables.iteritems():
791 if self.path.startswith(root_path):
792 return self._clone(
793 self.root, variable + self.path[len(root_path):], True)
794 # No need to clone, returns ourself.
795 return self
796
797 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000798 """Returns a clone of itself with 'root' stripped off.
799
800 Note that the file is kept if it is either accessible from a symlinked
801 path that was used to access the file or through the real path.
802 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000803 # Check internal consistency.
804 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root
805 if not self.full_path.startswith(root):
806 # Now try to resolve the symlinks to see if it can be reached this way.
807 # Only try *after* trying without resolving symlink.
808 if not self.real_path.startswith(root):
809 return None
810 path = self.real_path
811 else:
812 path = self.full_path
813 return self._clone(root, path[len(root):], self.tainted)
814
815 def _clone(self, new_root, new_path, tainted):
816 raise NotImplementedError(self.__class__.__name__)
817
818 class File(_TouchedObject):
819 """A file that was accessed. May not be present anymore.
820
821 If tainted is true, it means it is not a real path anymore as a variable
822 replacement occured.
823
maruel@chromium.org538141b2013-06-03 20:57:17 +0000824 |mode| can be one of None, TOUCHED, READ or WRITE.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000825 """
maruel@chromium.org538141b2013-06-03 20:57:17 +0000826 # Was probed for existence, and it is existent, but was never _opened_.
827 TOUCHED = 't'
828 # Opened for read only and guaranteed to not have been written to.
829 READ = 'r'
830 # Opened for write.
831 WRITE = 'w'
832
833 # They are listed in order of priority. E.g. if a file is traced as TOUCHED
834 # then as WRITE, only keep WRITE. None means no idea, which is a problem on
835 # Windows.
836 ACCEPTABLE_MODES = (None, TOUCHED, READ, WRITE)
837
838 def __init__(self, root, path, tainted, size, mode):
839 assert mode in self.ACCEPTABLE_MODES
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000840 super(Results.File, self).__init__(root, path, tainted, size, 1)
maruel@chromium.org538141b2013-06-03 20:57:17 +0000841 self.mode = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000842
843 def _clone(self, new_root, new_path, tainted):
844 """Clones itself keeping meta-data."""
845 # Keep the self.size and self._real_path caches for performance reason. It
846 # is also important when the file becomes tainted (with a variable instead
847 # of the real path) since self.path is not an on-disk path anymore so
848 # out._size cannot be updated.
maruel@chromium.org538141b2013-06-03 20:57:17 +0000849 out = self.__class__(new_root, new_path, tainted, self.size, self.mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000850 out._real_path = self._real_path
851 return out
852
maruel@chromium.org538141b2013-06-03 20:57:17 +0000853 def flatten(self):
854 out = super(Results.File, self).flatten()
855 out['mode'] = self.mode
856 return out
857
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000858 class Directory(_TouchedObject):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000859 """A directory of files. Must exist.
860
861 For a Directory instance, self.size is not a cache, it's an actual value
862 that is never modified and represents the total size of the files contained
863 in this directory. It is possible that the directory is empty so that
864 size==0; this happens if there's only an invalid symlink in it.
865 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000866 def __init__(self, root, path, tainted, size, nb_files):
867 """path='.' is a valid value and must be handled appropriately."""
868 assert not path.endswith(os.path.sep), path
869 super(Results.Directory, self).__init__(
870 root, path + os.path.sep, tainted, size, nb_files)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000871
872 def flatten(self):
873 out = super(Results.Directory, self).flatten()
874 out['nb_files'] = self.nb_files
875 return out
876
877 def _clone(self, new_root, new_path, tainted):
878 """Clones itself keeping meta-data."""
879 out = self.__class__(
880 new_root,
881 new_path.rstrip(os.path.sep),
882 tainted,
883 self.size,
884 self.nb_files)
885 out._real_path = self._real_path
886 return out
887
888 class Process(object):
889 """A process that was traced.
890
891 Contains references to the files accessed by this process and its children.
892 """
893 def __init__(self, pid, files, executable, command, initial_cwd, children):
894 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
895 self.pid = pid
896 self.files = sorted(files, key=lambda x: x.path)
897 self.children = children
898 self.executable = executable
899 self.command = command
900 self.initial_cwd = initial_cwd
901
902 # Check internal consistency.
903 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
904 f.path for f in self.files)
905 assert isinstance(self.children, list)
906 assert isinstance(self.files, list)
907
908 @property
909 def all(self):
910 for child in self.children:
911 for i in child.all:
912 yield i
913 yield self
914
915 def flatten(self):
916 return {
917 'children': [c.flatten() for c in self.children],
918 'command': self.command,
919 'executable': self.executable,
920 'files': [f.flatten() for f in self.files],
921 'initial_cwd': self.initial_cwd,
922 'pid': self.pid,
923 }
924
925 def strip_root(self, root):
926 assert isabs(root) and root.endswith(os.path.sep), root
927 # Loads the files after since they are constructed as objects.
928 out = self.__class__(
929 self.pid,
930 filter(None, (f.strip_root(root) for f in self.files)),
931 self.executable,
932 self.command,
933 self.initial_cwd,
934 [c.strip_root(root) for c in self.children])
935 logging.debug(
936 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
937 return out
938
939 def __init__(self, process):
940 self.process = process
941 # Cache.
942 self._files = None
943
944 def flatten(self):
945 return {
946 'root': self.process.flatten(),
947 }
948
949 @property
950 def files(self):
951 if self._files is None:
952 self._files = sorted(
953 sum((p.files for p in self.process.all), []),
954 key=lambda x: x.path)
955 return self._files
956
957 @property
958 def existent(self):
959 return [f for f in self.files if f.existent]
960
961 @property
962 def non_existent(self):
963 return [f for f in self.files if not f.existent]
964
965 def strip_root(self, root):
966 """Returns a clone with all the files outside the directory |root| removed
967 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000968
969 It keeps files accessible through the |root| directory or that have been
970 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000971 """
972 # Resolve any symlink
973 root = os.path.realpath(root)
974 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep
975 logging.debug('strip_root(%s)' % root)
976 return Results(self.process.strip_root(root))
977
978
979class ApiBase(object):
980 """OS-agnostic API to trace a process and its children."""
981 class Context(object):
982 """Processes one log line at a time and keeps the list of traced processes.
983
984 The parsing is complicated by the fact that logs are traced out of order for
985 strace but in-order for dtrace and logman. In addition, on Windows it is
986 very frequent that processids are reused so a flat list cannot be used. But
987 at the same time, it is impossible to faithfully construct a graph when the
988 logs are processed out of order. So both a tree and a flat mapping are used,
989 the tree is the real process tree, while the flat mapping stores the last
990 valid process for the corresponding processid. For the strace case, the
991 tree's head is guessed at the last moment.
992 """
993 class Process(object):
994 """Keeps context for one traced child process.
995
996 Logs all the files this process touched. Ignores directories.
997 """
998 def __init__(self, blacklist, pid, initial_cwd):
999 # Check internal consistency.
1000 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001001 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001002 self.pid = pid
1003 # children are Process instances.
1004 self.children = []
1005 self.initial_cwd = initial_cwd
1006 self.cwd = None
maruel@chromium.org538141b2013-06-03 20:57:17 +00001007 self.files = {}
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001008 self.executable = None
1009 self.command = None
1010 self._blacklist = blacklist
1011
1012 def to_results_process(self):
1013 """Resolves file case sensitivity and or late-bound strings."""
1014 # When resolving files, it's normal to get dupe because a file could be
1015 # opened multiple times with different case. Resolve the deduplication
1016 # here.
1017 def fix_path(x):
1018 """Returns the native file path case.
1019
1020 Converts late-bound strings.
1021 """
1022 if not x:
1023 # Do not convert None instance to 'None'.
1024 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001025 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001026 if os.path.isabs(x):
1027 # If the path is not absolute, which tends to happen occasionally on
1028 # Windows, it is not possible to get the native path case so ignore
1029 # that trace. It mostly happens for 'executable' value.
1030 x = get_native_path_case(x)
1031 return x
1032
maruel@chromium.org538141b2013-06-03 20:57:17 +00001033 def fix_and_blacklist_path(x, m):
1034 """Receives a tuple (filepath, mode) and processes filepath."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001035 x = fix_path(x)
1036 if not x:
1037 return
1038 # The blacklist needs to be reapplied, since path casing could
1039 # influence blacklisting.
1040 if self._blacklist(x):
1041 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001042 # Filters out directories. Some may have passed through.
1043 if os.path.isdir(x):
1044 return
1045 return x, m
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001046
maruel@chromium.org538141b2013-06-03 20:57:17 +00001047 # Renders all the files as strings, as some could be RelativePath
1048 # instances. It is important to do it first since there could still be
1049 # multiple entries with the same path but different modes.
1050 rendered = (
1051 fix_and_blacklist_path(f, m) for f, m in self.files.iteritems())
1052 files = sorted(
1053 (f for f in rendered if f),
1054 key=lambda x: (x[0], Results.File.ACCEPTABLE_MODES.index(x[1])))
1055 # Then converting into a dict will automatically clean up lesser
1056 # important values.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001057 files = [
maruel@chromium.org538141b2013-06-03 20:57:17 +00001058 Results.File(None, f, False, None, m)
1059 for f, m in dict(files).iteritems()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001060 ]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001061 return Results.Process(
1062 self.pid,
1063 files,
1064 fix_path(self.executable),
1065 self.command,
1066 fix_path(self.initial_cwd),
1067 [c.to_results_process() for c in self.children])
1068
maruel@chromium.org538141b2013-06-03 20:57:17 +00001069 def add_file(self, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001070 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001071 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001072 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001073 logging.debug('add_file(%d, %s, %s)', self.pid, filepath, mode)
1074 # Note that filepath and not render(filepath) is added. It is because
1075 # filepath could be something else than a string, like a RelativePath
1076 # instance for dtrace logs.
1077 modes = Results.File.ACCEPTABLE_MODES
1078 old_mode = self.files.setdefault(filepath, mode)
1079 if old_mode != mode and modes.index(old_mode) < modes.index(mode):
1080 # Take the highest value.
1081 self.files[filepath] = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001082
1083 def __init__(self, blacklist):
1084 self.blacklist = blacklist
1085 # Initial process.
1086 self.root_process = None
1087 # dict to accelerate process lookup, to not have to lookup the whole graph
1088 # each time.
1089 self._process_lookup = {}
1090
1091 class Tracer(object):
1092 """During it's lifetime, the tracing subsystem is enabled."""
1093 def __init__(self, logname):
1094 self._logname = logname
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001095 self._lock = threading.RLock()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001096 self._traces = []
1097 self._initialized = True
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001098 self._scripts_to_cleanup = []
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001099
1100 def trace(self, cmd, cwd, tracename, output):
1101 """Runs the OS-specific trace program on an executable.
1102
1103 Arguments:
1104 - cmd: The command (a list) to run.
1105 - cwd: Current directory to start the child process in.
1106 - tracename: Name of the trace in the logname file.
1107 - output: If False, redirects output to PIPEs.
1108
1109 Returns a tuple (resultcode, output) and updates the internal trace
1110 entries.
1111 """
1112 # The implementation adds an item to self._traces.
1113 raise NotImplementedError(self.__class__.__name__)
1114
1115 def close(self, _timeout=None):
1116 """Saves the meta-data in the logname file.
1117
1118 For kernel-based tracing, stops the tracing subsystem.
1119
1120 Must not be used manually when using 'with' construct.
1121 """
1122 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001123 if not self._initialized:
1124 raise TracingFailure(
1125 'Called %s.close() on an unitialized object' %
1126 self.__class__.__name__,
1127 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001128 try:
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001129 while self._scripts_to_cleanup:
1130 try:
1131 os.remove(self._scripts_to_cleanup.pop())
1132 except OSError as e:
1133 logging.error('Failed to delete a temporary script: %s', e)
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001134 write_json(self._logname, self._gen_logdata(), False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001135 finally:
1136 self._initialized = False
1137
1138 def post_process_log(self):
1139 """Post-processes the log so it becomes faster to load afterward.
1140
1141 Must not be used manually when using 'with' construct.
1142 """
1143 assert not self._initialized, 'Must stop tracing first.'
1144
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001145 def _gen_logdata(self):
1146 """Returns the data to be saved in the trace file."""
1147 return {
1148 'traces': self._traces,
1149 }
1150
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001151 def __enter__(self):
1152 """Enables 'with' statement."""
1153 return self
1154
1155 def __exit__(self, exc_type, exc_value, traceback):
1156 """Enables 'with' statement."""
1157 self.close()
1158 # If an exception was thrown, do not process logs.
1159 if not exc_type:
1160 self.post_process_log()
1161
1162 def get_tracer(self, logname):
1163 """Returns an ApiBase.Tracer instance.
1164
1165 Initializes the tracing subsystem, which is a requirement for kernel-based
1166 tracers. Only one tracer instance should be live at a time!
1167
1168 logname is the filepath to the json file that will contain the meta-data
1169 about the logs.
1170 """
1171 return self.Tracer(logname)
1172
1173 @staticmethod
1174 def clean_trace(logname):
1175 """Deletes an old log."""
1176 raise NotImplementedError()
1177
1178 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001179 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001180 """Processes trace logs and returns the files opened and the files that do
1181 not exist.
1182
1183 It does not track directories.
1184
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001185 Arguments:
1186 - logname: must be an absolute path.
1187 - blacklist: must be a lambda.
1188 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001189
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001190 Most of the time, files that do not exist are temporary test files that
1191 should be put in /tmp instead. See http://crbug.com/116251.
1192
1193 Returns a list of dict with keys:
1194 - results: A Results instance.
1195 - trace: The corresponding tracename parameter provided to
1196 get_tracer().trace().
1197 - output: Output gathered during execution, if get_tracer().trace(...,
1198 output=False) was used.
1199 """
1200 raise NotImplementedError(cls.__class__.__name__)
1201
1202
1203class Strace(ApiBase):
1204 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001205 @staticmethod
1206 def load_filename(filename):
1207 """Parses a filename in a log."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001208 # TODO(maruel): Be compatible with strace -x.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001209 assert isinstance(filename, str)
1210 out = ''
1211 i = 0
1212 while i < len(filename):
1213 c = filename[i]
1214 if c == '\\':
1215 out += chr(int(filename[i+1:i+4], 8))
1216 i += 4
1217 else:
1218 out += c
1219 i += 1
1220 # TODO(maruel): That's not necessarily true that the current code page is
1221 # utf-8.
1222 return out.decode('utf-8')
1223
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001224 class Context(ApiBase.Context):
1225 """Processes a strace log line and keeps the list of existent and non
1226 existent files accessed.
1227
1228 Ignores directories.
1229
1230 Uses late-binding to processes the cwd of each process. The problem is that
1231 strace generates one log file per process it traced but doesn't give any
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001232 information about which process was started when and by who. So process the
1233 logs out of order and use late binding with RelativePath to be able to
1234 deduce the initial directory of each process once all the logs are parsed.
1235
1236 TODO(maruel): Use the script even in the non-sudo case, so log parsing can
1237 be done in two phase: first find the root process, then process the child
1238 processes in order. With that, it should be possible to not use RelativePath
1239 anymore. This would significantly simplify the code!
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001240 """
1241 class Process(ApiBase.Context.Process):
1242 """Represents the state of a process.
1243
1244 Contains all the information retrieved from the pid-specific log.
1245 """
1246 # Function names are using ([a-z_0-9]+)
1247 # This is the most common format. function(args) = result
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001248 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.*?)\)\s+= (.+)$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001249 # An interrupted function call, only grab the minimal header.
1250 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
1251 # A resumed function call.
1252 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
1253 # A process received a signal.
1254 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
1255 # A process didn't handle a signal. Ignore any junk appearing before,
1256 # because the process was forcibly killed so it won't open any new file.
1257 RE_KILLED = re.compile(
1258 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
1259 # The process has exited.
1260 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
1261 # A call was canceled. Ignore any prefix.
1262 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
1263 # Happens when strace fails to even get the function name.
1264 UNNAMED_FUNCTION = '????'
1265
1266 # Corner-case in python, a class member function decorator must not be
1267 # @staticmethod.
1268 def parse_args(regexp, expect_zero): # pylint: disable=E0213
1269 """Automatically convert the str 'args' into a list of processed
1270 arguments.
1271
1272 Arguments:
1273 - regexp is used to parse args.
1274 - expect_zero: one of True, False or None.
1275 - True: will check for result.startswith('0') first and will ignore
1276 the trace line completely otherwise. This is important because for
1277 many functions, the regexp will not process if the call failed.
1278 - False: will check for not result.startswith(('?', '-1')) for the
1279 same reason than with True.
1280 - None: ignore result.
1281 """
1282 def meta_hook(function):
1283 assert function.__name__.startswith('handle_')
1284 def hook(self, args, result):
1285 if expect_zero is True and not result.startswith('0'):
1286 return
1287 if expect_zero is False and result.startswith(('?', '-1')):
1288 return
1289 match = re.match(regexp, args)
1290 if not match:
1291 raise TracingFailure(
1292 'Failed to parse %s(%s) = %s' %
1293 (function.__name__[len('handle_'):], args, result),
1294 None, None, None)
1295 return function(self, match.groups(), result)
1296 return hook
1297 return meta_hook
1298
1299 class RelativePath(object):
1300 """A late-bound relative path."""
1301 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001302 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001303 self.parent = parent
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001304 assert (
1305 value is None or
1306 (isinstance(value, unicode) and not os.path.isabs(value)))
1307 self.value = value
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001308 if self.value:
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001309 # TODO(maruel): On POSIX, '\\' is a valid character so remove this
1310 # assert.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001311 assert '\\' not in self.value, value
1312 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001313
1314 def render(self):
1315 """Returns the current directory this instance is representing.
1316
1317 This function is used to return the late-bound value.
1318 """
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001319 assert self.parent is not None
1320 parent = render(self.parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001321 if self.value:
1322 return os.path.normpath(os.path.join(parent, self.value))
1323 return parent
1324
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001325 def __init__(self, root, pid):
1326 """Keeps enough information to be able to guess the original process
1327 root.
1328
1329 strace doesn't store which process was the initial process. So more
1330 information needs to be kept so the graph can be reconstructed from the
1331 flat map.
1332 """
1333 logging.info('%s(%d)' % (self.__class__.__name__, pid))
1334 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
1335 assert isinstance(root, ApiBase.Context)
1336 self._root = weakref.ref(root)
1337 # The dict key is the function name of the pending call, like 'open'
1338 # or 'execve'.
1339 self._pending_calls = {}
1340 self._line_number = 0
1341 # Current directory when the process started.
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001342 if isinstance(self._root(), unicode):
1343 self.initial_cwd = self._root()
1344 else:
1345 self.initial_cwd = self.RelativePath(self._root(), None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001346 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001347 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001348
1349 def get_cwd(self):
1350 """Returns the best known value of cwd."""
1351 return self.cwd or self.initial_cwd
1352
1353 def render(self):
1354 """Returns the string value of the RelativePath() object.
1355
1356 Used by RelativePath. Returns the initial directory and not the
1357 current one since the current directory 'cwd' validity is time-limited.
1358
1359 The validity is only guaranteed once all the logs are processed.
1360 """
1361 return self.initial_cwd.render()
1362
1363 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001364 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001365 self._line_number += 1
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001366 try:
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001367 if self._done:
1368 raise TracingFailure(
1369 'Found a trace for a terminated process or corrupted log',
1370 None, None, None)
1371
1372 if self.RE_SIGNAL.match(line):
1373 # Ignore signals.
1374 return
1375
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001376 match = self.RE_KILLED.match(line)
1377 if match:
1378 # Converts a '+++ killed by Foo +++' trace into an exit_group().
1379 self.handle_exit_group(match.group(1), None)
1380 return
1381
1382 match = self.RE_PROCESS_EXITED.match(line)
1383 if match:
1384 # Converts a '+++ exited with 1 +++' trace into an exit_group()
1385 self.handle_exit_group(match.group(1), None)
1386 return
1387
1388 match = self.RE_UNFINISHED.match(line)
1389 if match:
1390 if match.group(1) in self._pending_calls:
1391 raise TracingFailure(
1392 'Found two unfinished calls for the same function',
1393 None, None, None,
1394 self._pending_calls)
1395 self._pending_calls[match.group(1)] = (
1396 match.group(1) + match.group(2))
1397 return
1398
1399 match = self.RE_UNAVAILABLE.match(line)
1400 if match:
1401 # This usually means a process was killed and a pending call was
1402 # canceled.
1403 # TODO(maruel): Look up the last exit_group() trace just above and
1404 # make sure any self._pending_calls[anything] is properly flushed.
1405 return
1406
1407 match = self.RE_RESUMED.match(line)
1408 if match:
1409 if match.group(1) not in self._pending_calls:
1410 raise TracingFailure(
1411 'Found a resumed call that was not logged as unfinished',
1412 None, None, None,
1413 self._pending_calls)
1414 pending = self._pending_calls.pop(match.group(1))
1415 # Reconstruct the line.
1416 line = pending + match.group(2)
1417
1418 match = self.RE_HEADER.match(line)
1419 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001420 # The line is corrupted. It happens occasionally when a process is
1421 # killed forcibly with activity going on. Assume the process died.
1422 # No other line can be processed afterward.
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001423 logging.debug('%d is done: %s', self.pid, line)
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001424 self._done = True
1425 return
1426
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001427 if match.group(1) == self.UNNAMED_FUNCTION:
1428 return
1429
1430 # It's a valid line, handle it.
1431 handler = getattr(self, 'handle_%s' % match.group(1), None)
1432 if not handler:
1433 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1434 return handler(match.group(2), match.group(3))
1435 except TracingFailure, e:
1436 # Hack in the values since the handler could be a static function.
1437 e.pid = self.pid
1438 e.line = line
1439 e.line_number = self._line_number
1440 # Re-raise the modified exception.
1441 raise
1442 except (KeyError, NotImplementedError, ValueError), e:
1443 raise TracingFailure(
1444 'Trace generated a %s exception: %s' % (
1445 e.__class__.__name__, str(e)),
1446 self.pid,
1447 self._line_number,
1448 line,
1449 e)
1450
1451 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1452 def handle_access(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001453 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001454
1455 @parse_args(r'^\"(.+?)\"$', True)
1456 def handle_chdir(self, args, _result):
1457 """Updates cwd."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001458 self.cwd = self._mangle(args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001459 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1460
maruel@chromium.org538141b2013-06-03 20:57:17 +00001461 @parse_args(r'^\"(.+?)\", (\d+), (\d+)$', False)
1462 def handle_chown(self, args, _result):
1463 # TODO(maruel): Look at result?
1464 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org55550722013-05-28 14:43:14 +00001465
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001466 def handle_clone(self, _args, result):
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001467 self._handling_forking('clone', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001468
1469 def handle_close(self, _args, _result):
1470 pass
1471
maruel@chromium.org538141b2013-06-03 20:57:17 +00001472 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1473 def handle_chmod(self, args, _result):
1474 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001475
maruel@chromium.org538141b2013-06-03 20:57:17 +00001476 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1477 def handle_creat(self, args, _result):
1478 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001479
1480 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1481 def handle_execve(self, args, _result):
1482 # Even if in practice execve() doesn't returns when it succeeds, strace
1483 # still prints '0' as the result.
1484 filepath = args[0]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001485 self._handle_file(filepath, Results.File.READ)
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001486 self.executable = self._mangle(filepath)
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001487 try:
1488 self.command = strace_process_quoted_arguments(args[1])
1489 except ValueError as e:
1490 raise TracingFailure(
1491 'Failed to process command line argument:\n%s' % e.args[0],
1492 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001493
1494 def handle_exit_group(self, _args, _result):
1495 """Removes cwd."""
1496 self.cwd = None
1497
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001498 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1499 def handle_faccessat(self, args, _results):
1500 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001501 self._handle_file(args[1], Results.File.TOUCHED)
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001502 else:
1503 raise Exception('Relative faccess not implemented.')
1504
maruel@chromium.org0781f322013-05-28 19:45:49 +00001505 def handle_fallocate(self, _args, result):
1506 pass
1507
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001508 def handle_fork(self, args, result):
1509 self._handle_unknown('fork', args, result)
1510
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001511 def handle_futex(self, _args, _result):
1512 pass
1513
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001514 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1515 def handle_getcwd(self, args, _result):
1516 if os.path.isabs(args[0]):
1517 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1518 if not isinstance(self.cwd, unicode):
1519 # Take the occasion to reset the path.
1520 self.cwd = self._mangle(args[0])
1521 else:
1522 # It should always match.
1523 assert self.cwd == Strace.load_filename(args[0]), (
1524 self.cwd, args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001525
1526 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1527 def handle_link(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001528 self._handle_file(args[0], Results.File.READ)
1529 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001530
1531 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1532 def handle_lstat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001533 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001534
1535 def handle_mkdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001536 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001537 pass
1538
1539 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1540 def handle_open(self, args, _result):
1541 if 'O_DIRECTORY' in args[1]:
1542 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001543 self._handle_file(
1544 args[0],
1545 Results.File.READ if 'O_RDONLY' in args[1] else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001546
1547 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1548 def handle_openat(self, args, _result):
1549 if 'O_DIRECTORY' in args[2]:
1550 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001551 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001552 self._handle_file(
1553 args[1],
1554 Results.File.READ if 'O_RDONLY' in args[2]
1555 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001556 else:
1557 # TODO(maruel): Implement relative open if necessary instead of the
1558 # AT_FDCWD flag, let's hope not since this means tracking all active
1559 # directory handles.
maruel@chromium.org538141b2013-06-03 20:57:17 +00001560 raise NotImplementedError('Relative open via openat not implemented.')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001561
1562 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1563 def handle_readlink(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001564 self._handle_file(args[0], Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001565
1566 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1567 def handle_rename(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001568 self._handle_file(args[0], Results.File.READ)
1569 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001570
1571 def handle_rmdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001572 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001573 pass
1574
1575 def handle_setxattr(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001576 # TODO(maruel): self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001577 pass
1578
1579 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1580 def handle_stat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001581 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001582
maruel@chromium.org538141b2013-06-03 20:57:17 +00001583 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1584 def handle_symlink(self, args, _result):
1585 self._handle_file(args[0], Results.File.TOUCHED)
1586 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001587
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001588 @parse_args(r'^\"(.+?)\", \d+', True)
1589 def handle_truncate(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001590 self._handle_file(args[0], Results.File.WRITE)
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001591
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001592 def handle_unlink(self, _args, _result):
1593 # In theory, the file had to be created anyway.
1594 pass
1595
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001596 def handle_unlinkat(self, _args, _result):
1597 # In theory, the file had to be created anyway.
1598 pass
1599
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001600 def handle_statfs(self, _args, _result):
1601 pass
1602
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001603 def handle_utimensat(self, _args, _result):
1604 pass
1605
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001606 def handle_vfork(self, _args, result):
1607 self._handling_forking('vfork', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001608
1609 @staticmethod
1610 def _handle_unknown(function, args, result):
1611 raise TracingFailure(
1612 'Unexpected/unimplemented trace %s(%s)= %s' %
1613 (function, args, result),
1614 None, None, None)
1615
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001616 def _handling_forking(self, name, result):
1617 """Transfers cwd."""
1618 if result.startswith(('?', '-1')):
1619 # The call failed.
1620 return
1621 # Update the other process right away.
1622 childpid = int(result)
1623 child = self._root().get_or_set_proc(childpid)
1624 if child.parentid is not None or childpid in self.children:
1625 raise TracingFailure(
1626 'Found internal inconsitency in process lifetime detection '
1627 'during a %s() call' % name,
1628 None, None, None)
1629
1630 # Copy the cwd object.
1631 child.initial_cwd = self.get_cwd()
1632 child.parentid = self.pid
1633 # It is necessary because the logs are processed out of order.
1634 self.children.append(child)
1635
maruel@chromium.org538141b2013-06-03 20:57:17 +00001636 def _handle_file(self, filepath, mode):
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001637 filepath = self._mangle(filepath)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001638 self.add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001639
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001640 def _mangle(self, filepath):
1641 """Decodes a filepath found in the log and convert it to a late-bound
1642 path if necessary.
1643
1644 |filepath| is an strace 'encoded' string and the returned value is
1645 either an unicode string if the path was absolute or a late bound path
1646 otherwise.
1647 """
1648 filepath = Strace.load_filename(filepath)
1649 if os.path.isabs(filepath):
1650 return filepath
1651 else:
1652 if isinstance(self.get_cwd(), unicode):
1653 return os.path.normpath(os.path.join(self.get_cwd(), filepath))
1654 return self.RelativePath(self.get_cwd(), filepath)
1655
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001656 def __init__(self, blacklist, root_pid, initial_cwd):
1657 """|root_pid| may be None when the root process is not known.
1658
1659 In that case, a search is done after reading all the logs to figure out
1660 the root process.
1661 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001662 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001663 assert_is_renderable(initial_cwd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001664 self.root_pid = root_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001665 self.initial_cwd = initial_cwd
1666
1667 def render(self):
1668 """Returns the string value of the initial cwd of the root process.
1669
1670 Used by RelativePath.
1671 """
1672 return self.initial_cwd
1673
1674 def on_line(self, pid, line):
1675 """Transfers control into the Process.on_line() function."""
1676 self.get_or_set_proc(pid).on_line(line.strip())
1677
1678 def to_results(self):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001679 """If necessary, finds back the root process and verify consistency."""
1680 if not self.root_pid:
1681 # The non-sudo case. The traced process was started by strace itself,
1682 # so the pid of the traced process is not known.
1683 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1684 if len(root) == 1:
1685 self.root_process = root[0]
1686 # Save it for later.
1687 self.root_pid = self.root_process.pid
1688 else:
1689 # The sudo case. The traced process was started manually so its pid is
1690 # known.
1691 self.root_process = self._process_lookup.get(self.root_pid)
1692 if not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001693 raise TracingFailure(
1694 'Found internal inconsitency in process lifetime detection '
1695 'while finding the root process',
1696 None,
1697 None,
1698 None,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001699 self.root_pid,
1700 sorted(self._process_lookup))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001701 process = self.root_process.to_results_process()
1702 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1703 raise TracingFailure(
1704 'Found internal inconsitency in process lifetime detection '
1705 'while looking for len(tree) == len(list)',
1706 None,
1707 None,
1708 None,
1709 sorted(self._process_lookup),
1710 sorted(p.pid for p in process.all))
1711 return Results(process)
1712
1713 def get_or_set_proc(self, pid):
1714 """Returns the Context.Process instance for this pid or creates a new one.
1715 """
1716 if not pid or not isinstance(pid, int):
1717 raise TracingFailure(
1718 'Unpexpected value for pid: %r' % pid,
1719 pid,
1720 None,
1721 None,
1722 pid)
1723 if pid not in self._process_lookup:
1724 self._process_lookup[pid] = self.Process(self, pid)
1725 return self._process_lookup[pid]
1726
1727 @classmethod
1728 def traces(cls):
1729 """Returns the list of all handled traces to pass this as an argument to
1730 strace.
1731 """
1732 prefix = 'handle_'
1733 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1734
1735 class Tracer(ApiBase.Tracer):
1736 MAX_LEN = 256
1737
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001738 def __init__(self, logname, use_sudo):
1739 super(Strace.Tracer, self).__init__(logname)
1740 self.use_sudo = use_sudo
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001741 if use_sudo:
1742 # TODO(maruel): Use the jump script systematically to make it easy to
1743 # figure out the root process, so RelativePath is not necessary anymore.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001744 self._child_script = create_exec_thunk()
1745 self._scripts_to_cleanup.append(self._child_script)
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001746
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001747 def trace(self, cmd, cwd, tracename, output):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001748 """Runs strace on an executable.
1749
1750 When use_sudo=True, it is a 3-phases process: start the thunk, start
1751 sudo strace with the pid of the thunk and then have the thunk os.execve()
1752 the process to trace.
1753 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001754 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1755 assert os.path.isabs(cmd[0]), cmd[0]
1756 assert os.path.isabs(cwd), cwd
1757 assert os.path.normpath(cwd) == cwd, cwd
1758 with self._lock:
1759 if not self._initialized:
1760 raise TracingFailure(
1761 'Called Tracer.trace() on an unitialized object',
1762 None, None, None, tracename)
1763 assert tracename not in (i['trace'] for i in self._traces)
1764 stdout = stderr = None
1765 if output:
1766 stdout = subprocess.PIPE
1767 stderr = subprocess.STDOUT
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001768
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001769 # Ensure all file related APIs are hooked.
1770 traces = ','.join(Strace.Context.traces() + ['file'])
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001771 flags = [
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001772 # Each child process has its own trace file. It is necessary because
1773 # strace may generate corrupted log file if multiple processes are
1774 # heavily doing syscalls simultaneously.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001775 '-ff',
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001776 # Reduce whitespace usage.
1777 '-a1',
1778 # hex encode non-ascii strings.
1779 # TODO(maruel): '-x',
1780 # TODO(maruel): '-ttt',
1781 # Signals are unnecessary noise here. Note the parser can cope with them
1782 # but reduce the unnecessary output.
1783 '-esignal=none',
1784 # Print as much data as wanted.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001785 '-s', '%d' % self.MAX_LEN,
1786 '-e', 'trace=%s' % traces,
1787 '-o', self._logname + '.' + tracename,
1788 ]
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001789
1790 if self.use_sudo:
1791 pipe_r, pipe_w = os.pipe()
1792 # Start the child process paused.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001793 target_cmd = [sys.executable, self._child_script, str(pipe_r)] + cmd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001794 logging.debug(' '.join(target_cmd))
1795 child_proc = subprocess.Popen(
1796 target_cmd,
1797 stdin=subprocess.PIPE,
1798 stdout=stdout,
1799 stderr=stderr,
1800 cwd=cwd)
1801
1802 # TODO(maruel): both processes must use the same UID for it to work
1803 # without sudo. Look why -p is failing at the moment without sudo.
1804 trace_cmd = [
1805 'sudo',
1806 'strace',
1807 '-p', str(child_proc.pid),
1808 ] + flags
1809 logging.debug(' '.join(trace_cmd))
1810 strace_proc = subprocess.Popen(
1811 trace_cmd,
1812 cwd=cwd,
1813 stdin=subprocess.PIPE,
1814 stdout=subprocess.PIPE,
1815 stderr=subprocess.PIPE)
1816
1817 line = strace_proc.stderr.readline()
1818 if not re.match(r'^Process \d+ attached \- interrupt to quit$', line):
1819 # TODO(maruel): Raise an exception.
1820 assert False, line
1821
1822 # Now fire the child process.
1823 os.write(pipe_w, 'x')
1824
1825 out = child_proc.communicate()[0]
1826 strace_out = strace_proc.communicate()[0]
1827
1828 # TODO(maruel): if strace_proc.returncode: Add an exception.
1829 saved_out = strace_out if strace_proc.returncode else out
1830 root_pid = child_proc.pid
1831 else:
1832 # Non-sudo case.
1833 trace_cmd = [
1834 'strace',
1835 ] + flags + cmd
1836 logging.debug(' '.join(trace_cmd))
1837 child_proc = subprocess.Popen(
1838 trace_cmd,
1839 cwd=cwd,
1840 stdin=subprocess.PIPE,
1841 stdout=stdout,
1842 stderr=stderr)
1843 out = child_proc.communicate()[0]
1844 # TODO(maruel): Walk the logs and figure out the root process would
1845 # simplify parsing the logs a *lot*.
1846 saved_out = out
1847 # The trace reader will have to figure out.
1848 root_pid = None
1849
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001850 with self._lock:
1851 assert tracename not in (i['trace'] for i in self._traces)
1852 self._traces.append(
1853 {
1854 'cmd': cmd,
1855 'cwd': cwd,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001856 'output': saved_out,
1857 'pid': root_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001858 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001859 })
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001860 return child_proc.returncode, out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001861
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001862 def __init__(self, use_sudo=None):
1863 super(Strace, self).__init__()
1864 self.use_sudo = use_sudo
1865
1866 def get_tracer(self, logname):
1867 return self.Tracer(logname, self.use_sudo)
1868
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001869 @staticmethod
1870 def clean_trace(logname):
1871 if os.path.isfile(logname):
1872 os.remove(logname)
1873 # Also delete any pid specific file from previous traces.
1874 for i in glob.iglob(logname + '.*'):
1875 if i.rsplit('.', 1)[1].isdigit():
1876 os.remove(i)
1877
1878 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001879 def parse_log(cls, logname, blacklist, trace_name):
1880 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001881 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001882 data = read_json(logname)
1883 out = []
1884 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001885 if trace_name and item['trace'] != trace_name:
1886 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001887 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001888 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001889 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001890 }
1891 try:
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001892 context = cls.Context(blacklist, item['pid'], item['cwd'])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001893 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001894 logging.debug('Reading %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001895 pid = pidfile.rsplit('.', 1)[1]
1896 if pid.isdigit():
1897 pid = int(pid)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001898 found_line = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001899 for line in open(pidfile, 'rb'):
1900 context.on_line(pid, line)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001901 found_line = True
1902 if not found_line:
1903 # Ensures that a completely empty trace still creates the
1904 # corresponding Process instance by logging a dummy line.
1905 context.on_line(pid, '')
1906 else:
1907 logging.warning('Found unexpected file %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001908 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001909 except TracingFailure:
1910 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001911 out.append(result)
1912 return out
1913
1914
1915class Dtrace(ApiBase):
1916 """Uses DTrace framework through dtrace. Requires root access.
1917
1918 Implies Mac OSX.
1919
1920 dtruss can't be used because it has compatibility issues with python.
1921
1922 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1923 get the absolute path of the 'cwd' dtrace variable from the probe.
1924
1925 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1926 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1927 manually.
1928
1929 errno is not printed in the log since this implementation currently only cares
1930 about files that were successfully opened.
1931 """
1932 class Context(ApiBase.Context):
1933 # Format: index pid function(args)
1934 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1935
1936 # Arguments parsing.
1937 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1938 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1939 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1940 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1941 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1942 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1943
maruel@chromium.orgac361162013-06-04 15:54:06 +00001944 # O_DIRECTORY is not defined on Windows and dtrace doesn't exist on Windows.
1945 O_DIRECTORY = os.O_DIRECTORY if hasattr(os, 'O_DIRECTORY') else None
maruel@chromium.org538141b2013-06-03 20:57:17 +00001946 O_RDWR = os.O_RDWR
1947 O_WRONLY = os.O_WRONLY
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001948
1949 class Process(ApiBase.Context.Process):
1950 def __init__(self, *args):
1951 super(Dtrace.Context.Process, self).__init__(*args)
1952 self.cwd = self.initial_cwd
1953
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001954 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001955 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001956 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001957 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001958 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001959 # Process ID of the temporary script created by create_subprocess_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001960 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001961 self._initial_cwd = initial_cwd
1962 self._line_number = 0
1963
1964 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001965 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001966 self._line_number += 1
1967 match = self.RE_HEADER.match(line)
1968 if not match:
1969 raise TracingFailure(
1970 'Found malformed line: %s' % line,
1971 None,
1972 self._line_number,
1973 line)
1974 fn = getattr(
1975 self,
1976 'handle_%s' % match.group(2).replace('-', '_'),
1977 self._handle_ignored)
1978 # It is guaranteed to succeed because of the regexp. Or at least I thought
1979 # it would.
1980 pid = int(match.group(1))
1981 try:
1982 return fn(pid, match.group(3))
1983 except TracingFailure, e:
1984 # Hack in the values since the handler could be a static function.
1985 e.pid = pid
1986 e.line = line
1987 e.line_number = self._line_number
1988 # Re-raise the modified exception.
1989 raise
1990 except (KeyError, NotImplementedError, ValueError), e:
1991 raise TracingFailure(
1992 'Trace generated a %s exception: %s' % (
1993 e.__class__.__name__, str(e)),
1994 pid,
1995 self._line_number,
1996 line,
1997 e)
1998
1999 def to_results(self):
2000 process = self.root_process.to_results_process()
2001 # Internal concistency check.
2002 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
2003 raise TracingFailure(
2004 'Found internal inconsitency in process lifetime detection '
2005 'while looking for len(tree) == len(list)',
2006 None,
2007 None,
2008 None,
2009 sorted(self._process_lookup),
2010 sorted(p.pid for p in process.all))
2011 return Results(process)
2012
2013 def handle_dtrace_BEGIN(self, _pid, args):
2014 if not self.RE_DTRACE_BEGIN.match(args):
2015 raise TracingFailure(
2016 'Found internal inconsitency in dtrace_BEGIN log line',
2017 None, None, None)
2018
2019 def handle_proc_start(self, pid, args):
2020 """Transfers cwd.
2021
2022 The dtrace script already takes care of only tracing the processes that
2023 are child of the traced processes so there is no need to verify the
2024 process hierarchy.
2025 """
2026 if pid in self._process_lookup:
2027 raise TracingFailure(
2028 'Found internal inconsitency in proc_start: %d started two times' %
2029 pid,
2030 None, None, None)
2031 match = self.RE_PROC_START.match(args)
2032 if not match:
2033 raise TracingFailure(
2034 'Failed to parse arguments: %s' % args,
2035 None, None, None)
2036 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002037 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002038 proc = self.root_process = self.Process(
2039 self.blacklist, pid, self._initial_cwd)
2040 elif ppid in self._process_lookup:
2041 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
2042 self._process_lookup[ppid].children.append(proc)
2043 else:
2044 # Another process tree, ignore.
2045 return
2046 self._process_lookup[pid] = proc
2047 logging.debug(
2048 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002049 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002050
2051 def handle_proc_exit(self, pid, _args):
2052 """Removes cwd."""
2053 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002054 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002055 self._process_lookup[pid].cwd = None
2056
2057 def handle_execve(self, pid, args):
2058 """Sets the process' executable.
2059
2060 TODO(maruel): Read command line arguments. See
2061 https://discussions.apple.com/thread/1980539 for an example.
2062 https://gist.github.com/1242279
2063
2064 Will have to put the answer at http://stackoverflow.com/questions/7556249.
2065 :)
2066 """
2067 if not pid in self._process_lookup:
2068 # Another process tree, ignore.
2069 return
2070 match = self.RE_EXECVE.match(args)
2071 if not match:
2072 raise TracingFailure(
2073 'Failed to parse arguments: %r' % args,
2074 None, None, None)
2075 proc = self._process_lookup[pid]
2076 proc.executable = match.group(1)
maruel@chromium.org538141b2013-06-03 20:57:17 +00002077 self._handle_file(pid, proc.executable, Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002078 proc.command = self.process_escaped_arguments(match.group(3))
2079 if int(match.group(2)) != len(proc.command):
2080 raise TracingFailure(
2081 'Failed to parse execve() arguments: %s' % args,
2082 None, None, None)
2083
2084 def handle_chdir(self, pid, args):
2085 """Updates cwd."""
2086 if pid not in self._process_lookup:
2087 # Another process tree, ignore.
2088 return
2089 cwd = self.RE_CHDIR.match(args).group(1)
2090 if not cwd.startswith('/'):
2091 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
2092 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
2093 else:
2094 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
2095 cwd2 = cwd
2096 self._process_lookup[pid].cwd = cwd2
2097
2098 def handle_open_nocancel(self, pid, args):
2099 """Redirects to handle_open()."""
2100 return self.handle_open(pid, args)
2101
2102 def handle_open(self, pid, args):
2103 if pid not in self._process_lookup:
2104 # Another process tree, ignore.
2105 return
2106 match = self.RE_OPEN.match(args)
2107 if not match:
2108 raise TracingFailure(
2109 'Failed to parse arguments: %s' % args,
2110 None, None, None)
2111 flag = int(match.group(2), 16)
2112 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
2113 # Ignore directories.
2114 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002115 self._handle_file(
2116 pid,
2117 match.group(1),
2118 Results.File.READ if not ((self.O_RDWR | self.O_WRONLY) & flag)
2119 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002120
2121 def handle_rename(self, pid, args):
2122 if pid not in self._process_lookup:
2123 # Another process tree, ignore.
2124 return
2125 match = self.RE_RENAME.match(args)
2126 if not match:
2127 raise TracingFailure(
2128 'Failed to parse arguments: %s' % args,
2129 None, None, None)
maruel@chromium.org538141b2013-06-03 20:57:17 +00002130 self._handle_file(pid, match.group(1), Results.File.READ)
2131 self._handle_file(pid, match.group(2), Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002132
maruel@chromium.org538141b2013-06-03 20:57:17 +00002133 def _handle_file(self, pid, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002134 if not filepath.startswith('/'):
2135 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
2136 # We can get '..' in the path.
2137 filepath = os.path.normpath(filepath)
2138 # Sadly, still need to filter out directories here;
2139 # saw open_nocancel(".", 0, 0) = 0 lines.
2140 if os.path.isdir(filepath):
2141 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002142 self._process_lookup[pid].add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002143
2144 def handle_ftruncate(self, pid, args):
2145 """Just used as a signal to kill dtrace, ignoring."""
2146 pass
2147
2148 @staticmethod
2149 def _handle_ignored(pid, args):
2150 """Is called for all the event traces that are not handled."""
2151 raise NotImplementedError('Please implement me')
2152
2153 @staticmethod
2154 def process_escaped_arguments(text):
2155 """Extracts escaped arguments on a string and return the arguments as a
2156 list.
2157
2158 Implemented as an automaton.
2159
2160 Example:
2161 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
2162 function will return ['python2.7', '-c', 'print("hi")]
2163 """
2164 if not text.endswith('\\0'):
2165 raise ValueError('String is not null terminated: %r' % text, text)
2166 text = text[:-2]
2167
2168 def unescape(x):
2169 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
2170 out = []
2171 escaped = False
2172 for i in x:
2173 if i == '\\' and not escaped:
2174 escaped = True
2175 continue
2176 escaped = False
2177 out.append(i)
2178 return ''.join(out)
2179
2180 return [unescape(i) for i in text.split('\\001')]
2181
2182 class Tracer(ApiBase.Tracer):
2183 # pylint: disable=C0301
2184 #
2185 # To understand the following code, you'll want to take a look at:
2186 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
2187 # https://wikis.oracle.com/display/DTrace/Variables
2188 # http://docs.oracle.com/cd/E19205-01/820-4221/
2189 #
2190 # 0. Dump all the valid probes into a text file. It is important, you
2191 # want to redirect into a file and you don't want to constantly 'sudo'.
2192 # $ sudo dtrace -l > probes.txt
2193 #
2194 # 1. Count the number of probes:
2195 # $ wc -l probes.txt
2196 # 81823 # On OSX 10.7, including 1 header line.
2197 #
2198 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
2199 # likes and skipping the header with NR>1:
2200 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
2201 # dtrace
2202 # fbt
2203 # io
2204 # ip
2205 # lockstat
2206 # mach_trap
2207 # proc
2208 # profile
2209 # sched
2210 # syscall
2211 # tcp
2212 # vminfo
2213 #
2214 # 3. List of valid probes:
2215 # $ grep syscall probes.txt | less
2216 # or use dtrace directly:
2217 # $ sudo dtrace -l -P syscall | less
2218 #
2219 # trackedpid is an associative array where its value can be 0, 1 or 2.
2220 # 0 is for untracked processes and is the default value for items not
2221 # in the associative array.
2222 # 1 is for tracked processes.
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002223 # 2 is for the script created by create_subprocess_thunk() only. It is not
2224 # tracked itself but all its decendants are.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002225 #
2226 # The script will kill itself only once waiting_to_die == 1 and
2227 # current_processes == 0, so that both getlogin() was called and that
2228 # all traced processes exited.
2229 #
2230 # TODO(maruel): Use cacheable predicates. See
2231 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
2232 D_CODE = """
2233 dtrace:::BEGIN {
2234 waiting_to_die = 0;
2235 current_processes = 0;
2236 logindex = 0;
2237 printf("%d %d %s_%s(\\"%s\\")\\n",
2238 logindex, PID, probeprov, probename, SCRIPT);
2239 logindex++;
2240 }
2241
2242 proc:::start /trackedpid[ppid]/ {
2243 trackedpid[pid] = 1;
2244 current_processes += 1;
2245 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
2246 logindex, pid, probeprov, probename,
2247 ppid,
2248 execname,
2249 current_processes);
2250 logindex++;
2251 }
2252 /* Should use SCRIPT but there is no access to this variable at that
2253 * point. */
2254 proc:::start /ppid == PID && execname == "Python"/ {
2255 trackedpid[pid] = 2;
2256 current_processes += 1;
2257 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
2258 logindex, pid, probeprov, probename,
2259 ppid,
2260 execname,
2261 current_processes);
2262 logindex++;
2263 }
2264 proc:::exit /trackedpid[pid] &&
2265 current_processes == 1 &&
2266 waiting_to_die == 1/ {
2267 trackedpid[pid] = 0;
2268 current_processes -= 1;
2269 printf("%d %d %s_%s(%d)\\n",
2270 logindex, pid, probeprov, probename,
2271 current_processes);
2272 logindex++;
2273 exit(0);
2274 }
2275 proc:::exit /trackedpid[pid]/ {
2276 trackedpid[pid] = 0;
2277 current_processes -= 1;
2278 printf("%d %d %s_%s(%d)\\n",
2279 logindex, pid, probeprov, probename,
2280 current_processes);
2281 logindex++;
2282 }
2283
2284 /* Use an arcane function to detect when we need to die */
2285 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
2286 waiting_to_die = 1;
2287 printf("%d %d %s()\\n", logindex, pid, probefunc);
2288 logindex++;
2289 }
2290 syscall::ftruncate:entry /
2291 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
2292 exit(0);
2293 }
2294
2295 syscall::open*:entry /trackedpid[pid] == 1/ {
2296 self->open_arg0 = arg0;
2297 self->open_arg1 = arg1;
2298 self->open_arg2 = arg2;
2299 }
2300 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
2301 this->open_arg0 = copyinstr(self->open_arg0);
2302 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
2303 logindex, pid, probefunc,
2304 this->open_arg0,
2305 self->open_arg1,
2306 self->open_arg2);
2307 logindex++;
2308 this->open_arg0 = 0;
2309 }
2310 syscall::open*:return /trackedpid[pid] == 1/ {
2311 self->open_arg0 = 0;
2312 self->open_arg1 = 0;
2313 self->open_arg2 = 0;
2314 }
2315
2316 syscall::rename:entry /trackedpid[pid] == 1/ {
2317 self->rename_arg0 = arg0;
2318 self->rename_arg1 = arg1;
2319 }
2320 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
2321 this->rename_arg0 = copyinstr(self->rename_arg0);
2322 this->rename_arg1 = copyinstr(self->rename_arg1);
2323 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
2324 logindex, pid, probefunc,
2325 this->rename_arg0,
2326 this->rename_arg1);
2327 logindex++;
2328 this->rename_arg0 = 0;
2329 this->rename_arg1 = 0;
2330 }
2331 syscall::rename:return /trackedpid[pid] == 1/ {
2332 self->rename_arg0 = 0;
2333 self->rename_arg1 = 0;
2334 }
2335
2336 /* Track chdir, it's painful because it is only receiving relative path.
2337 */
2338 syscall::chdir:entry /trackedpid[pid] == 1/ {
2339 self->chdir_arg0 = arg0;
2340 }
2341 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
2342 this->chdir_arg0 = copyinstr(self->chdir_arg0);
2343 printf("%d %d %s(\\"%s\\")\\n",
2344 logindex, pid, probefunc,
2345 this->chdir_arg0);
2346 logindex++;
2347 this->chdir_arg0 = 0;
2348 }
2349 syscall::chdir:return /trackedpid[pid] == 1/ {
2350 self->chdir_arg0 = 0;
2351 }
2352 """
2353
2354 # execve-specific code, tends to throw a lot of exceptions.
2355 D_CODE_EXECVE = """
2356 /* Finally what we care about! */
2357 syscall::exec*:entry /trackedpid[pid]/ {
2358 self->exec_arg0 = copyinstr(arg0);
2359 /* Incrementally probe for a NULL in the argv parameter of execve() to
2360 * figure out argc. */
2361 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
2362 * found. */
2363 self->exec_argc = 0;
2364 /* Probe for argc==1 */
2365 this->exec_argv = (user_addr_t*)copyin(
2366 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2367 self->exec_argc = this->exec_argv[self->exec_argc] ?
2368 (self->exec_argc + 1) : self->exec_argc;
2369
2370 /* Probe for argc==2 */
2371 this->exec_argv = (user_addr_t*)copyin(
2372 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2373 self->exec_argc = this->exec_argv[self->exec_argc] ?
2374 (self->exec_argc + 1) : self->exec_argc;
2375
2376 /* Probe for argc==3 */
2377 this->exec_argv = (user_addr_t*)copyin(
2378 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2379 self->exec_argc = this->exec_argv[self->exec_argc] ?
2380 (self->exec_argc + 1) : self->exec_argc;
2381
2382 /* Probe for argc==4 */
2383 this->exec_argv = (user_addr_t*)copyin(
2384 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2385 self->exec_argc = this->exec_argv[self->exec_argc] ?
2386 (self->exec_argc + 1) : self->exec_argc;
2387
2388 /* Copy the inputs strings since there is no guarantee they'll be
2389 * present after the call completed. */
2390 self->exec_argv0 = (self->exec_argc > 0) ?
2391 copyinstr(this->exec_argv[0]) : "";
2392 self->exec_argv1 = (self->exec_argc > 1) ?
2393 copyinstr(this->exec_argv[1]) : "";
2394 self->exec_argv2 = (self->exec_argc > 2) ?
2395 copyinstr(this->exec_argv[2]) : "";
2396 self->exec_argv3 = (self->exec_argc > 3) ?
2397 copyinstr(this->exec_argv[3]) : "";
2398 this->exec_argv = 0;
2399 }
2400 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2401 /* We need to join strings here, as using multiple printf() would
2402 * cause tearing when multiple threads/processes are traced.
2403 * Since it is impossible to escape a string and join it to another one,
2404 * like sprintf("%s%S", previous, more), use hackery.
2405 * Each of the elements are split with a \\1. \\0 cannot be used because
2406 * it is simply ignored. This will conflict with any program putting a
2407 * \\1 in their execve() string but this should be "rare enough" */
2408 this->args = "";
2409 /* Process exec_argv[0] */
2410 this->args = strjoin(
2411 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2412
2413 /* Process exec_argv[1] */
2414 this->args = strjoin(
2415 this->args, (self->exec_argc > 1) ? "\\1" : "");
2416 this->args = strjoin(
2417 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2418
2419 /* Process exec_argv[2] */
2420 this->args = strjoin(
2421 this->args, (self->exec_argc > 2) ? "\\1" : "");
2422 this->args = strjoin(
2423 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2424
2425 /* Process exec_argv[3] */
2426 this->args = strjoin(
2427 this->args, (self->exec_argc > 3) ? "\\1" : "");
2428 this->args = strjoin(
2429 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2430
2431 /* Prints self->exec_argc to permits verifying the internal
2432 * consistency since this code is quite fishy. */
2433 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2434 logindex, pid, probefunc,
2435 self->exec_arg0,
2436 self->exec_argc,
2437 this->args);
2438 logindex++;
2439 this->args = 0;
2440 }
2441 syscall::exec*:return /trackedpid[pid]/ {
2442 self->exec_arg0 = 0;
2443 self->exec_argc = 0;
2444 self->exec_argv0 = 0;
2445 self->exec_argv1 = 0;
2446 self->exec_argv2 = 0;
2447 self->exec_argv3 = 0;
2448 }
2449 """
2450
2451 # Code currently not used.
2452 D_EXTRANEOUS = """
2453 /* This is a good learning experience, since it traces a lot of things
2454 * related to the process and child processes.
2455 * Warning: it generates a gigantic log. For example, tracing
2456 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2457 * several minutes to execute.
2458 */
2459 /*
2460 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2461 printf("%d %d %s_%s() = %d\\n",
2462 logindex, pid, probeprov, probefunc, errno);
2463 logindex++;
2464 }
2465 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2466 printf("%d %d %s_%s() = %d\\n",
2467 logindex, pid, probeprov, probefunc, errno);
2468 logindex++;
2469 }
2470 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2471 printf("%d %d %s_%s() = %d\\n",
2472 logindex, pid, probeprov, probefunc, errno);
2473 logindex++;
2474 }
2475 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2476 printf("%d %d %s_%s() = %d\\n",
2477 logindex, pid, probeprov, probefunc, errno);
2478 logindex++;
2479 }
2480 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2481 printf("%d %d %s_%s() = %d\\n",
2482 logindex, pid, probeprov, probefunc, errno);
2483 logindex++;
2484 }
2485 */
2486 /* TODO(maruel): *stat* functions and friends
2487 syscall::access:return,
2488 syscall::chdir:return,
2489 syscall::chflags:return,
2490 syscall::chown:return,
2491 syscall::chroot:return,
2492 syscall::getattrlist:return,
2493 syscall::getxattr:return,
2494 syscall::lchown:return,
2495 syscall::lstat64:return,
2496 syscall::lstat:return,
2497 syscall::mkdir:return,
2498 syscall::pathconf:return,
2499 syscall::readlink:return,
2500 syscall::removexattr:return,
2501 syscall::setxattr:return,
2502 syscall::stat64:return,
2503 syscall::stat:return,
2504 syscall::truncate:return,
2505 syscall::unlink:return,
2506 syscall::utimes:return,
2507 */
2508 """
2509
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002510 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002511 """Starts the log collection with dtrace.
2512
2513 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2514 this needs to wait for dtrace to be "warmed up".
2515 """
2516 super(Dtrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002517 # This script is used as a signal to figure out the root process.
2518 self._signal_script = create_subprocess_thunk()
2519 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002520 # This unique dummy temp file is used to signal the dtrace script that it
2521 # should stop as soon as all the child processes are done. A bit hackish
2522 # but works fine enough.
2523 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2524 prefix='trace_signal_file')
2525
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002526 dtrace_path = '/usr/sbin/dtrace'
2527 if not os.path.isfile(dtrace_path):
2528 dtrace_path = 'dtrace'
2529 elif use_sudo is None and (os.stat(dtrace_path).st_mode & stat.S_ISUID):
2530 # No need to sudo. For those following at home, don't do that.
2531 use_sudo = False
2532
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002533 # Note: do not use the -p flag. It's useless if the initial process quits
2534 # too fast, resulting in missing traces from the grand-children. The D
2535 # code manages the dtrace lifetime itself.
2536 trace_cmd = [
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002537 dtrace_path,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002538 # Use a larger buffer if getting 'out of scratch space' errors.
2539 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2540 '-b', '10m',
2541 '-x', 'dynvarsize=10m',
2542 #'-x', 'dtrace_global_maxsize=1m',
2543 '-x', 'evaltime=exec',
2544 '-o', '/dev/stderr',
2545 '-q',
2546 '-n', self._get_dtrace_code(),
2547 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002548 if use_sudo is not False:
2549 trace_cmd.insert(0, 'sudo')
2550
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002551 with open(self._logname + '.log', 'wb') as logfile:
2552 self._dtrace = subprocess.Popen(
2553 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2554 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2555
2556 # Reads until one line is printed, which signifies dtrace is up and ready.
2557 with open(self._logname + '.log', 'rb') as logfile:
2558 while 'dtrace_BEGIN' not in logfile.readline():
2559 if self._dtrace.poll() is not None:
2560 # Do a busy wait. :/
2561 break
2562 logging.debug('dtrace started')
2563
2564 def _get_dtrace_code(self):
2565 """Setups the D code to implement child process tracking.
2566
2567 Injects the cookie in the script so it knows when to stop.
2568
2569 The script will detect any instance of the script created with
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002570 create_subprocess_thunk() and will start tracing it.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002571 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002572 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002573 'inline int PID = %d;\n'
2574 'inline string SCRIPT = "%s";\n'
2575 'inline int FILE_ID = %d;\n'
2576 '\n'
2577 '%s') % (
2578 os.getpid(),
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002579 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002580 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002581 self.D_CODE)
2582 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2583 # Do not enable by default since it tends to spew dtrace: error lines
2584 # because the execve() parameters are not in valid memory at the time of
2585 # logging.
2586 # TODO(maruel): Find a way to make this reliable since it's useful but
2587 # only works in limited/trivial uses cases for now.
2588 out += self.D_CODE_EXECVE
2589 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002590
2591 def trace(self, cmd, cwd, tracename, output):
2592 """Runs dtrace on an executable.
2593
2594 This dtruss is broken when it starts the process itself or when tracing
2595 child processes, this code starts a wrapper process
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002596 generated with create_subprocess_thunk() which starts the executable to
2597 trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002598 """
2599 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2600 assert os.path.isabs(cmd[0]), cmd[0]
2601 assert os.path.isabs(cwd), cwd
2602 assert os.path.normpath(cwd) == cwd, cwd
2603 with self._lock:
2604 if not self._initialized:
2605 raise TracingFailure(
2606 'Called Tracer.trace() on an unitialized object',
2607 None, None, None, tracename)
2608 assert tracename not in (i['trace'] for i in self._traces)
2609
2610 # Starts the script wrapper to start the child process. This signals the
2611 # dtrace script that this process is to be traced.
2612 stdout = stderr = None
2613 if output:
2614 stdout = subprocess.PIPE
2615 stderr = subprocess.STDOUT
2616 child_cmd = [
2617 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002618 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002619 tracename,
2620 ]
2621 # Call a dummy function so that dtrace knows I'm about to launch a process
2622 # that needs to be traced.
2623 # Yummy.
2624 child = subprocess.Popen(
2625 child_cmd + fix_python_path(cmd),
2626 stdin=subprocess.PIPE,
2627 stdout=stdout,
2628 stderr=stderr,
2629 cwd=cwd)
2630 logging.debug('Started child pid: %d' % child.pid)
2631
2632 out = child.communicate()[0]
2633 # This doesn't mean tracing is done, one of the grand-child process may
2634 # still be alive. It will be tracked with the dtrace script.
2635
2636 with self._lock:
2637 assert tracename not in (i['trace'] for i in self._traces)
2638 self._traces.append(
2639 {
2640 'cmd': cmd,
2641 'cwd': cwd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002642 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002643 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002644 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002645 })
2646 return child.returncode, out
2647
2648 def close(self, timeout=None):
2649 """Terminates dtrace."""
2650 logging.debug('close(%s)' % timeout)
2651 try:
2652 try:
2653 super(Dtrace.Tracer, self).close(timeout)
2654 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002655 # ftruncate doesn't exist on Windows.
2656 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002657 if timeout:
2658 start = time.time()
2659 # Use polling. :/
2660 while (self._dtrace.poll() is None and
2661 (time.time() - start) < timeout):
2662 time.sleep(0.1)
2663 self._dtrace.kill()
2664 self._dtrace.wait()
2665 finally:
2666 # Make sure to kill it in any case.
2667 if self._dtrace.poll() is None:
2668 try:
2669 self._dtrace.kill()
2670 self._dtrace.wait()
2671 except OSError:
2672 pass
2673
2674 if self._dtrace.returncode != 0:
2675 # Warn about any dtrace failure but basically ignore it.
2676 print 'dtrace failure: %s' % self._dtrace.returncode
2677 finally:
2678 os.close(self._dummy_file_id)
2679 os.remove(self._dummy_file_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002680
2681 def post_process_log(self):
2682 """Sorts the log back in order when each call occured.
2683
2684 dtrace doesn't save the buffer in strict order since it keeps one buffer
2685 per CPU.
2686 """
2687 super(Dtrace.Tracer, self).post_process_log()
2688 logname = self._logname + '.log'
2689 with open(logname, 'rb') as logfile:
2690 lines = [l for l in logfile if l.strip()]
2691 errors = [l for l in lines if l.startswith('dtrace:')]
2692 if errors:
2693 raise TracingFailure(
2694 'Found errors in the trace: %s' % '\n'.join(errors),
2695 None, None, None, logname)
2696 try:
2697 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2698 except ValueError:
2699 raise TracingFailure(
2700 'Found errors in the trace: %s' % '\n'.join(
2701 l for l in lines if l.split(' ', 1)[0].isdigit()),
2702 None, None, None, logname)
2703 with open(logname, 'wb') as logfile:
2704 logfile.write(''.join(lines))
2705
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002706 def __init__(self, use_sudo=None):
2707 super(Dtrace, self).__init__()
2708 self.use_sudo = use_sudo
2709
2710 def get_tracer(self, logname):
2711 return self.Tracer(logname, self.use_sudo)
2712
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002713 @staticmethod
2714 def clean_trace(logname):
2715 for ext in ('', '.log'):
2716 if os.path.isfile(logname + ext):
2717 os.remove(logname + ext)
2718
2719 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002720 def parse_log(cls, logname, blacklist, trace_name):
2721 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002722 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002723
2724 def blacklist_more(filepath):
2725 # All the HFS metadata is in the form /.vol/...
2726 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2727
2728 data = read_json(logname)
2729 out = []
2730 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002731 if trace_name and item['trace'] != trace_name:
2732 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002733 result = {
2734 'output': item['output'],
2735 'trace': item['trace'],
2736 }
2737 try:
2738 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002739 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2740 # be valid UTF-8 and we control the log output.
2741 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002742 context.on_line(line)
2743 result['results'] = context.to_results()
2744 except TracingFailure:
2745 result['exception'] = sys.exc_info()
2746 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002747 return out
2748
2749
2750class LogmanTrace(ApiBase):
2751 """Uses the native Windows ETW based tracing functionality to trace a child
2752 process.
2753
2754 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2755 the Windows Kernel doesn't have a concept of 'current working directory' at
2756 all. A Win32 process has a map of current directories, one per drive letter
2757 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2758 opened relative to another file_object or as an absolute path. All the current
2759 working directory logic is done in user mode.
2760 """
2761 class Context(ApiBase.Context):
2762 """Processes a ETW log line and keeps the list of existent and non
2763 existent files accessed.
2764
2765 Ignores directories.
2766 """
2767 # These indexes are for the stripped version in json.
2768 EVENT_NAME = 0
2769 TYPE = 1
2770 PID = 2
2771 TID = 3
2772 PROCESSOR_ID = 4
2773 TIMESTAMP = 5
2774 USER_DATA = 6
2775
2776 class Process(ApiBase.Context.Process):
2777 def __init__(self, *args):
2778 super(LogmanTrace.Context.Process, self).__init__(*args)
2779 # Handle file objects that succeeded.
2780 self.file_objects = {}
2781
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002782 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2783 logging.info(
2784 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2785 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002786 super(LogmanTrace.Context, self).__init__(blacklist)
2787 self._drive_map = DosDriveMap()
2788 # Threads mapping to the corresponding process id.
2789 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002790 # Process ID of the tracer, e.g. the temporary script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002791 # create_subprocess_thunk(). This is tricky because the process id may
2792 # have been reused.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002793 self._thunk_pid = thunk_pid
2794 self._thunk_cmd = thunk_cmd
2795 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002796 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002797 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002798
2799 def on_line(self, line):
2800 """Processes a json Event line."""
2801 self._line_number += 1
2802 try:
2803 # By Opcode
2804 handler = getattr(
2805 self,
2806 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2807 None)
2808 if not handler:
2809 raise TracingFailure(
2810 'Unexpected event %s_%s' % (
2811 line[self.EVENT_NAME], line[self.TYPE]),
2812 None, None, None)
2813 handler(line)
2814 except TracingFailure, e:
2815 # Hack in the values since the handler could be a static function.
2816 e.pid = line[self.PID]
2817 e.line = line
2818 e.line_number = self._line_number
2819 # Re-raise the modified exception.
2820 raise
2821 except (KeyError, NotImplementedError, ValueError), e:
2822 raise TracingFailure(
2823 'Trace generated a %s exception: %s' % (
2824 e.__class__.__name__, str(e)),
2825 line[self.PID],
2826 self._line_number,
2827 line,
2828 e)
2829
2830 def to_results(self):
2831 if not self.root_process:
2832 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002833 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002834 None, None, None)
2835 process = self.root_process.to_results_process()
2836 return Results(process)
2837
2838 def _thread_to_process(self, tid):
2839 """Finds the process from the thread id."""
2840 tid = int(tid, 16)
2841 pid = self._threads_active.get(tid)
2842 if not pid or not self._process_lookup.get(pid):
2843 return
2844 return self._process_lookup[pid]
2845
2846 @classmethod
2847 def handle_EventTrace_Header(cls, line):
2848 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2849 BUFFER_SIZE = cls.USER_DATA
2850 #VERSION = cls.USER_DATA + 1
2851 #PROVIDER_VERSION = cls.USER_DATA + 2
2852 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2853 #END_TIME = cls.USER_DATA + 4
2854 #TIMER_RESOLUTION = cls.USER_DATA + 5
2855 #MAX_FILE_SIZE = cls.USER_DATA + 6
2856 #LOG_FILE_MODE = cls.USER_DATA + 7
2857 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2858 #START_BUFFERS = cls.USER_DATA + 9
2859 #POINTER_SIZE = cls.USER_DATA + 10
2860 EVENTS_LOST = cls.USER_DATA + 11
2861 #CPU_SPEED = cls.USER_DATA + 12
2862 #LOGGER_NAME = cls.USER_DATA + 13
2863 #LOG_FILE_NAME = cls.USER_DATA + 14
2864 #BOOT_TIME = cls.USER_DATA + 15
2865 #PERF_FREQ = cls.USER_DATA + 16
2866 #START_TIME = cls.USER_DATA + 17
2867 #RESERVED_FLAGS = cls.USER_DATA + 18
2868 #BUFFERS_LOST = cls.USER_DATA + 19
2869 #SESSION_NAME_STRING = cls.USER_DATA + 20
2870 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2871 if line[EVENTS_LOST] != '0':
2872 raise TracingFailure(
2873 ( '%s events were lost during trace, please increase the buffer '
2874 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2875 None, None, None)
2876
2877 def handle_FileIo_Cleanup(self, line):
2878 """General wisdom: if a file is closed, it's because it was opened.
2879
2880 Note that FileIo_Close is not used since if a file was opened properly but
2881 not closed before the process exits, only Cleanup will be logged.
2882 """
2883 #IRP = self.USER_DATA
2884 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2885 FILE_OBJECT = self.USER_DATA + 2
2886 #FILE_KEY = self.USER_DATA + 3
2887 proc = self._thread_to_process(line[TTID])
2888 if not proc:
2889 # Not a process we care about.
2890 return
2891 file_object = line[FILE_OBJECT]
2892 if file_object in proc.file_objects:
maruel@chromium.org538141b2013-06-03 20:57:17 +00002893 filepath, access_type = proc.file_objects.pop(file_object)
2894 proc.add_file(filepath, access_type)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002895
2896 def handle_FileIo_Create(self, line):
2897 """Handles a file open.
2898
2899 All FileIo events are described at
2900 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2901 for some value of 'description'.
2902
2903 " (..) process and thread id values of the IO events (..) are not valid "
2904 http://msdn.microsoft.com/magazine/ee358703.aspx
2905
2906 The FileIo.Create event doesn't return if the CreateFile() call
2907 succeeded, so keep track of the file_object and check that it is
2908 eventually closed with FileIo_Cleanup.
2909 """
2910 #IRP = self.USER_DATA
2911 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2912 FILE_OBJECT = self.USER_DATA + 2
2913 #CREATE_OPTIONS = self.USER_DATA + 3
2914 #FILE_ATTRIBUTES = self.USER_DATA + 4
maruel@chromium.org538141b2013-06-03 20:57:17 +00002915 #SHARE_ACCESS = self.USER_DATA + 5
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002916 OPEN_PATH = self.USER_DATA + 6
2917
2918 proc = self._thread_to_process(line[TTID])
2919 if not proc:
2920 # Not a process we care about.
2921 return
2922
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002923 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002924 # Ignore directories and bare drive right away.
2925 if raw_path.endswith(os.path.sep):
2926 return
2927 filepath = self._drive_map.to_win32(raw_path)
2928 # Ignore bare drive right away. Some may still fall through with format
2929 # like '\\?\X:'
2930 if len(filepath) == 2:
2931 return
2932 file_object = line[FILE_OBJECT]
2933 if os.path.isdir(filepath):
2934 # There is no O_DIRECTORY equivalent on Windows. The closed is
2935 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2936 # simply discard directories are they are found.
2937 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002938 # Override any stale file object.
2939 # TODO(maruel): Figure out a way to detect if the file was opened for
2940 # reading or writting. Sadly CREATE_OPTIONS doesn't seem to be of any help
2941 # here. For now mark as None to make it clear we have no idea what it is
2942 # about.
2943 proc.file_objects[file_object] = (filepath, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002944
2945 def handle_FileIo_Rename(self, line):
2946 # TODO(maruel): Handle?
2947 pass
2948
2949 def handle_Process_End(self, line):
2950 pid = line[self.PID]
2951 if self._process_lookup.get(pid):
2952 logging.info('Terminated: %d' % pid)
2953 self._process_lookup[pid] = None
2954 else:
2955 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002956 if self._thunk_process and self._thunk_process.pid == pid:
2957 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002958
2959 def handle_Process_Start(self, line):
2960 """Handles a new child process started by PID."""
2961 #UNIQUE_PROCESS_KEY = self.USER_DATA
2962 PROCESS_ID = self.USER_DATA + 1
2963 #PARENT_PID = self.USER_DATA + 2
2964 #SESSION_ID = self.USER_DATA + 3
2965 #EXIT_STATUS = self.USER_DATA + 4
2966 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2967 #USER_SID = self.USER_DATA + 6
2968 IMAGE_FILE_NAME = self.USER_DATA + 7
2969 COMMAND_LINE = self.USER_DATA + 8
2970
2971 ppid = line[self.PID]
2972 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002973 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002974 logging.debug(
2975 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002976 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002977
2978 if pid == self._thunk_pid:
2979 # Need to ignore processes we don't know about because the log is
2980 # system-wide. self._thunk_pid shall start only one process.
2981 # This is tricky though because Windows *loves* to reuse process id and
2982 # it happens often that the process ID of the thunk script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002983 # create_subprocess_thunk() is reused. So just detecting the pid here is
2984 # not sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002985 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2986 logging.info(
2987 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2988 pid, self._trace_name, command_line, self._thunk_cmd)
2989 return
2990
2991 # TODO(maruel): The check is quite weak. Add the thunk path.
2992 if self._thunk_process:
2993 raise TracingFailure(
2994 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2995 'already set') % (self._thunk_pid, self._thunk_process.pid),
2996 None, None, None)
2997 proc = self.Process(self.blacklist, pid, None)
2998 self._thunk_process = proc
2999 return
3000 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003001 proc = self.Process(self.blacklist, pid, None)
3002 self.root_process = proc
3003 ppid = None
3004 elif self._process_lookup.get(ppid):
3005 proc = self.Process(self.blacklist, pid, None)
3006 self._process_lookup[ppid].children.append(proc)
3007 else:
3008 # Ignore
3009 return
3010 self._process_lookup[pid] = proc
3011
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003012 proc.command = command_line
3013 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003014 # proc.command[0] may be the absolute path of 'executable' but it may be
3015 # anything else too. If it happens that command[0] ends with executable,
3016 # use it, otherwise defaults to the base name.
3017 cmd0 = proc.command[0].lower()
3018 if not cmd0.endswith('.exe'):
3019 # TODO(maruel): That's not strictly true either.
3020 cmd0 += '.exe'
3021 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
3022 # Fix the path.
3023 cmd0 = cmd0.replace('/', os.path.sep)
3024 cmd0 = os.path.normpath(cmd0)
3025 proc.executable = get_native_path_case(cmd0)
3026 logging.info(
3027 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
3028
3029 def handle_Thread_End(self, line):
3030 """Has the same parameters as Thread_Start."""
3031 tid = int(line[self.TID], 16)
3032 self._threads_active.pop(tid, None)
3033
3034 def handle_Thread_Start(self, line):
3035 """Handles a new thread created.
3036
3037 Do not use self.PID here since a process' initial thread is created by
3038 the parent process.
3039 """
3040 PROCESS_ID = self.USER_DATA
3041 TTHREAD_ID = self.USER_DATA + 1
3042 #STACK_BASE = self.USER_DATA + 2
3043 #STACK_LIMIT = self.USER_DATA + 3
3044 #USER_STACK_BASE = self.USER_DATA + 4
3045 #USER_STACK_LIMIT = self.USER_DATA + 5
3046 #AFFINITY = self.USER_DATA + 6
3047 #WIN32_START_ADDR = self.USER_DATA + 7
3048 #TEB_BASE = self.USER_DATA + 8
3049 #SUB_PROCESS_TAG = self.USER_DATA + 9
3050 #BASE_PRIORITY = self.USER_DATA + 10
3051 #PAGE_PRIORITY = self.USER_DATA + 11
3052 #IO_PRIORITY = self.USER_DATA + 12
3053 #THREAD_FLAGS = self.USER_DATA + 13
3054 # Do not use self.PID here since a process' initial thread is created by
3055 # the parent process.
3056 pid = int(line[PROCESS_ID], 16)
3057 tid = int(line[TTHREAD_ID], 16)
3058 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
3059 self._threads_active[tid] = pid
3060
3061 @classmethod
3062 def supported_events(cls):
3063 """Returns all the procesed events."""
3064 out = []
3065 for member in dir(cls):
3066 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
3067 if match:
3068 out.append(match.groups())
3069 return out
3070
3071 class Tracer(ApiBase.Tracer):
3072 # The basic headers.
3073 EXPECTED_HEADER = [
3074 u'Event Name',
3075 u'Type',
3076 u'Event ID',
3077 u'Version',
3078 u'Channel',
3079 u'Level', # 5
3080 u'Opcode',
3081 u'Task',
3082 u'Keyword',
3083 u'PID',
3084 u'TID', # 10
3085 u'Processor Number',
3086 u'Instance ID',
3087 u'Parent Instance ID',
3088 u'Activity ID',
3089 u'Related Activity ID', # 15
3090 u'Clock-Time',
3091 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
3092 u'User(ms)', # pretty much useless.
3093 u'User Data', # Extra arguments that are event-specific.
3094 ]
3095 # Only the useful headers common to all entries are listed there. Any column
3096 # at 19 or higher is dependent on the specific event.
3097 EVENT_NAME = 0
3098 TYPE = 1
3099 PID = 9
3100 TID = 10
3101 PROCESSOR_ID = 11
3102 TIMESTAMP = 16
3103 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
3104 USER_DATA = 19
3105
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003106 class CsvReader(object):
3107 """CSV reader that reads files generated by tracerpt.exe.
3108
3109 csv.reader() fails to read them properly, it mangles file names quoted
3110 with "" with a comma in it.
3111 """
3112 # 0. Had a ',' or one of the following ' ' after a comma, next should
3113 # be ' ', '"' or string or ',' for an empty field.
3114 ( HAD_DELIMITER,
3115 # 1. Processing an unquoted field up to ','.
3116 IN_STR,
3117 # 2. Processing a new field starting with '"'.
3118 STARTING_STR_QUOTED,
3119 # 3. Second quote in a row at the start of a field. It could be either
3120 # '""foo""' or '""'. Who the hell thought it was a great idea to use
3121 # the same character for delimiting and escaping?
3122 STARTING_SECOND_QUOTE,
3123 # 4. A quote inside a quoted string where the previous character was
3124 # not a quote, so the string is not empty. Can be either: end of a
3125 # quoted string (a delimiter) or a quote escape. The next char must be
3126 # either '"' or ','.
3127 HAD_QUOTE_IN_QUOTED,
3128 # 5. Second quote inside a quoted string.
3129 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
3130 # 6. Processing a field that started with '"'.
3131 IN_STR_QUOTED) = range(7)
3132
3133 def __init__(self, f):
3134 self.f = f
3135
3136 def __iter__(self):
3137 return self
3138
3139 def next(self):
3140 """Splits the line in fields."""
3141 line = self.f.readline()
3142 if not line:
3143 raise StopIteration()
3144 line = line.strip()
3145 fields = []
3146 state = self.HAD_DELIMITER
3147 for i, c in enumerate(line):
3148 if state == self.HAD_DELIMITER:
3149 if c == ',':
3150 # Empty field.
3151 fields.append('')
3152 elif c == ' ':
3153 # Ignore initial whitespaces
3154 pass
3155 elif c == '"':
3156 state = self.STARTING_STR_QUOTED
3157 fields.append('')
3158 else:
3159 # Start of a new field.
3160 state = self.IN_STR
3161 fields.append(c)
3162
3163 elif state == self.IN_STR:
3164 # Do not accept quote inside unquoted field.
3165 assert c != '"', (i, c, line, fields)
3166 if c == ',':
3167 fields[-1] = fields[-1].strip()
3168 state = self.HAD_DELIMITER
3169 else:
3170 fields[-1] = fields[-1] + c
3171
3172 elif state == self.STARTING_STR_QUOTED:
3173 if c == '"':
3174 # Do not store the character yet.
3175 state = self.STARTING_SECOND_QUOTE
3176 else:
3177 state = self.IN_STR_QUOTED
3178 fields[-1] = fields[-1] + c
3179
3180 elif state == self.STARTING_SECOND_QUOTE:
3181 if c == ',':
3182 # It was an empty field. '""' == ''.
3183 state = self.HAD_DELIMITER
3184 else:
3185 fields[-1] = fields[-1] + '"' + c
3186 state = self.IN_STR_QUOTED
3187
3188 elif state == self.HAD_QUOTE_IN_QUOTED:
3189 if c == ',':
3190 # End of the string.
3191 state = self.HAD_DELIMITER
3192 elif c == '"':
3193 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
3194 else:
3195 # The previous double-quote was just an unescaped quote.
3196 fields[-1] = fields[-1] + '"' + c
3197 state = self.IN_STR_QUOTED
3198
3199 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
3200 if c == ',':
3201 # End of the string.
3202 state = self.HAD_DELIMITER
3203 fields[-1] = fields[-1] + '"'
3204 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00003205 # That's just how the logger rolls. Revert back to appending the
3206 # char and "guess" it was a quote in a double-quoted string.
3207 state = self.IN_STR_QUOTED
3208 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003209
3210 elif state == self.IN_STR_QUOTED:
3211 if c == '"':
3212 # Could be a delimiter or an escape.
3213 state = self.HAD_QUOTE_IN_QUOTED
3214 else:
3215 fields[-1] = fields[-1] + c
3216
3217 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
3218 fields[-1] = fields[-1] + '"'
3219 else:
3220 assert state in (
3221 # Terminated with a normal field.
3222 self.IN_STR,
3223 # Terminated with an empty field.
3224 self.STARTING_SECOND_QUOTE,
3225 # Terminated with a normal quoted field.
3226 self.HAD_QUOTE_IN_QUOTED), (
3227 line, state, fields)
3228 return fields
3229
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003230 def __init__(self, logname):
3231 """Starts the log collection.
3232
3233 Requires administrative access. logman.exe is synchronous so no need for a
3234 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
3235 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
3236
3237 One can get the list of potentially interesting providers with:
3238 "logman query providers | findstr /i file"
3239 """
3240 super(LogmanTrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00003241 self._signal_script = create_subprocess_thunk()
3242 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003243 cmd_start = [
3244 'logman.exe',
3245 'start',
3246 'NT Kernel Logger',
3247 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
3248 # splitio,fileiocompletion,syscall,file,cswitch,img
3249 '(process,fileio,thread)',
3250 '-o', self._logname + '.etl',
3251 '-ets', # Send directly to kernel
3252 # Values extracted out of thin air.
3253 # Event Trace Session buffer size in kb.
3254 '-bs', '10240',
3255 # Number of Event Trace Session buffers.
3256 '-nb', '16', '256',
3257 ]
3258 logging.debug('Running: %s' % cmd_start)
3259 try:
3260 subprocess.check_call(
3261 cmd_start,
3262 stdin=subprocess.PIPE,
3263 stdout=subprocess.PIPE,
3264 stderr=subprocess.STDOUT)
3265 except subprocess.CalledProcessError, e:
3266 if e.returncode == -2147024891:
3267 print >> sys.stderr, 'Please restart with an elevated admin prompt'
3268 elif e.returncode == -2144337737:
3269 print >> sys.stderr, (
3270 'A kernel trace was already running, stop it and try again')
3271 raise
3272
3273 def trace(self, cmd, cwd, tracename, output):
3274 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
3275 assert os.path.isabs(cmd[0]), cmd[0]
3276 assert os.path.isabs(cwd), cwd
3277 assert os.path.normpath(cwd) == cwd, cwd
3278 with self._lock:
3279 if not self._initialized:
3280 raise TracingFailure(
3281 'Called Tracer.trace() on an unitialized object',
3282 None, None, None, tracename)
3283 assert tracename not in (i['trace'] for i in self._traces)
3284
3285 # Use "logman -?" for help.
3286
3287 stdout = stderr = None
3288 if output:
3289 stdout = subprocess.PIPE
3290 stderr = subprocess.STDOUT
3291
3292 # Run the child process.
3293 logging.debug('Running: %s' % cmd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00003294 # Use the temporary script generated with create_subprocess_thunk() so we
3295 # have a clear pid owner. Since trace_inputs.py can be used as a library
3296 # and could trace multiple processes simultaneously, it makes it more
3297 # complex if the executable to be traced is executed directly here. It
3298 # also solves issues related to logman.exe that needs to be executed to
3299 # control the kernel trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003300 child_cmd = [
3301 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00003302 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003303 tracename,
3304 ]
3305 child = subprocess.Popen(
3306 child_cmd + fix_python_path(cmd),
3307 cwd=cwd,
3308 stdin=subprocess.PIPE,
3309 stdout=stdout,
3310 stderr=stderr)
3311 logging.debug('Started child pid: %d' % child.pid)
3312 out = child.communicate()[0]
3313 # This doesn't mean all the grand-children are done. Sadly, we don't have
3314 # a good way to determine that.
3315
3316 with self._lock:
3317 assert tracename not in (i['trace'] for i in self._traces)
3318 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00003319 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003320 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00003321 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003322 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00003323 # Used to figure out the real process when process ids are reused.
3324 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003325 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003326 })
3327
3328 return child.returncode, out
3329
3330 def close(self, _timeout=None):
3331 """Stops the kernel log collection and converts the traces to text
3332 representation.
3333 """
3334 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00003335 try:
3336 super(LogmanTrace.Tracer, self).close()
3337 finally:
3338 cmd_stop = [
3339 'logman.exe',
3340 'stop',
3341 'NT Kernel Logger',
3342 '-ets', # Sends the command directly to the kernel.
3343 ]
3344 logging.debug('Running: %s' % cmd_stop)
3345 subprocess.check_call(
3346 cmd_stop,
3347 stdin=subprocess.PIPE,
3348 stdout=subprocess.PIPE,
3349 stderr=subprocess.STDOUT)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003350
3351 def post_process_log(self):
3352 """Converts the .etl file into .csv then into .json."""
3353 super(LogmanTrace.Tracer, self).post_process_log()
3354 logformat = 'csv'
3355 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003356 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003357
maruel@chromium.org808e3f22013-05-16 16:42:54 +00003358 def _gen_logdata(self):
3359 return {
3360 'format': 'csv',
3361 'traces': self._traces,
3362 }
3363
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003364 def _trim_log(self, logformat):
3365 """Reduces the amount of data in original log by generating a 'reduced'
3366 log.
3367 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003368 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003369 file_handle = codecs.open(
3370 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003371
3372 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003373 assert sys.getfilesystemencoding() == 'mbcs'
3374 file_handle = codecs.open(
3375 self._logname + '.' + logformat, 'r',
3376 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003377
3378 supported_events = LogmanTrace.Context.supported_events()
3379
3380 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003381 """Loads items from the generator and returns the interesting data.
3382
3383 It filters out any uninteresting line and reduce the amount of data in
3384 the trace.
3385 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003386 for index, line in enumerate(generator):
3387 if not index:
3388 if line != self.EXPECTED_HEADER:
3389 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003390 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003391 None, None, None)
3392 continue
3393 # As you can see, the CSV is full of useful non-redundant information:
3394 if (line[2] != '0' or # Event ID
3395 line[3] not in ('2', '3') or # Version
3396 line[4] != '0' or # Channel
3397 line[5] != '0' or # Level
3398 line[7] != '0' or # Task
3399 line[8] != '0x0000000000000000' or # Keyword
3400 line[12] != '' or # Instance ID
3401 line[13] != '' or # Parent Instance ID
3402 line[14] != self.NULL_GUID or # Activity ID
3403 line[15] != ''): # Related Activity ID
3404 raise TracingFailure(
3405 'Found unexpected values in line: %s' % ' '.join(line),
3406 None, None, None)
3407
3408 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3409 continue
3410
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003411 yield [
3412 line[self.EVENT_NAME],
3413 line[self.TYPE],
3414 line[self.PID],
3415 line[self.TID],
3416 line[self.PROCESSOR_ID],
3417 line[self.TIMESTAMP],
3418 ] + line[self.USER_DATA:]
3419
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003420 # must not convert the trim() call into a list, since it will use too much
3421 # memory for large trace. use a csv file as a workaround since the json
3422 # parser requires a complete in-memory file.
3423 with open('%s.preprocessed' % self._logname, 'wb') as f:
3424 # $ and * can't be used in file name on windows, reducing the likelihood
3425 # of having to escape a string.
3426 out = csv.writer(
3427 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003428 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003429 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003430
3431 def _convert_log(self, logformat):
3432 """Converts the ETL trace to text representation.
3433
3434 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3435 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3436
3437 Arguments:
3438 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3439
3440 Use "tracerpt -?" for help.
3441 """
3442 LOCALE_INVARIANT = 0x7F
3443 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3444 cmd_convert = [
3445 'tracerpt.exe',
3446 '-l', self._logname + '.etl',
3447 '-o', self._logname + '.' + logformat,
3448 '-gmt', # Use UTC
3449 '-y', # No prompt
3450 # Use -of XML to get the header of each items after column 19, e.g. all
3451 # the actual headers of 'User Data'.
3452 ]
3453
3454 if logformat == 'csv':
3455 # tracerpt localizes the 'Type' column, for major brainfuck
3456 # entertainment. I can't imagine any sane reason to do that.
3457 cmd_convert.extend(['-of', 'CSV'])
3458 elif logformat == 'csv_utf16':
3459 # This causes it to use UTF-16, which doubles the log size but ensures
3460 # the log is readable for non-ASCII characters.
3461 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3462 elif logformat == 'xml':
3463 cmd_convert.extend(['-of', 'XML'])
3464 else:
3465 raise ValueError('Unexpected log format \'%s\'' % logformat)
3466 logging.debug('Running: %s' % cmd_convert)
3467 # This can takes tens of minutes for large logs.
3468 # Redirects all output to stderr.
3469 subprocess.check_call(
3470 cmd_convert,
3471 stdin=subprocess.PIPE,
3472 stdout=sys.stderr,
3473 stderr=sys.stderr)
3474
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003475 def __init__(self, use_sudo=False): # pylint: disable=W0613
3476 super(LogmanTrace, self).__init__()
3477 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3478
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003479 @staticmethod
3480 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003481 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003482 if os.path.isfile(logname + ext):
3483 os.remove(logname + ext)
3484
3485 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003486 def parse_log(cls, logname, blacklist, trace_name):
3487 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003488 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003489
3490 def blacklist_more(filepath):
3491 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3492 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3493
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003494 # Create a list of (Context, result_dict) tuples. This is necessary because
3495 # the csv file may be larger than the amount of available memory.
3496 contexes = [
3497 (
3498 cls.Context(
3499 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3500 {
3501 'output': item['output'],
3502 'trace': item['trace'],
3503 },
3504 )
3505 for item in read_json(logname)['traces']
3506 if not trace_name or item['trace'] == trace_name
3507 ]
3508
3509 # The log may be too large to fit in memory and it is not efficient to read
3510 # it multiple times, so multiplex the contexes instead, which is slightly
3511 # more awkward.
3512 with open('%s.preprocessed' % logname, 'rb') as f:
3513 lines = csv.reader(
3514 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3515 for encoded in lines:
3516 line = [s.decode('utf-8') for s in encoded]
3517 # Convert the PID in-place from hex.
3518 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3519 for context in contexes:
3520 if 'exception' in context[1]:
3521 continue
3522 try:
3523 context[0].on_line(line)
3524 except TracingFailure:
3525 context[1]['exception'] = sys.exc_info()
3526
3527 for context in contexes:
3528 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003529 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003530 context[1]['results'] = context[0].to_results()
3531
3532 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003533
3534
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003535def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003536 """Returns the correct implementation for the current OS."""
3537 if sys.platform == 'cygwin':
3538 raise NotImplementedError(
3539 'Not implemented for cygwin, start the script from Win32 python')
3540 flavors = {
3541 'win32': LogmanTrace,
3542 'darwin': Dtrace,
3543 'sunos5': Dtrace,
3544 'freebsd7': Dtrace,
3545 'freebsd8': Dtrace,
3546 }
3547 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003548 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003549
3550
3551def extract_directories(root_dir, files, blacklist):
3552 """Detects if all the files in a directory are in |files| and if so, replace
3553 the individual files by a Results.Directory instance.
3554
3555 Takes a list of Results.File instances and returns a shorter list of
3556 Results.File and Results.Directory instances.
3557
3558 Arguments:
3559 - root_dir: Optional base directory that shouldn't be search further.
3560 - files: list of Results.File instances.
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003561 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003562 """
3563 logging.info(
3564 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3565 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003566 # It is important for root_dir to not be a symlinked path, make sure to call
3567 # os.path.realpath() as needed.
3568 assert not root_dir or (
3569 os.path.realpath(get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003570 assert not any(isinstance(f, Results.Directory) for f in files)
3571 # Remove non existent files.
3572 files = [f for f in files if f.existent]
3573 if not files:
3574 return files
3575 # All files must share the same root, which can be None.
3576 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3577
3578 # Creates a {directory: {filename: File}} mapping, up to root.
3579 buckets = {}
3580 if root_dir:
3581 buckets[root_dir] = {}
3582 for fileobj in files:
3583 path = fileobj.full_path
3584 directory = os.path.dirname(path)
3585 assert directory
3586 # Do not use os.path.basename() so trailing os.path.sep is kept.
3587 basename = path[len(directory)+1:]
3588 files_in_directory = buckets.setdefault(directory, {})
3589 files_in_directory[basename] = fileobj
3590 # Add all the directories recursively up to root.
3591 while True:
3592 old_d = directory
3593 directory = os.path.dirname(directory)
3594 if directory + os.path.sep == root_dir or directory == old_d:
3595 break
3596 buckets.setdefault(directory, {})
3597
3598 root_prefix = len(root_dir) + 1 if root_dir else 0
3599 for directory in sorted(buckets, reverse=True):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003600 if not os.path.isdir(directory):
3601 logging.debug(
3602 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3603 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003604 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3605 expected = set(buckets[directory])
3606 if not (actual - expected):
3607 parent = os.path.dirname(directory)
3608 buckets[parent][os.path.basename(directory)] = Results.Directory(
3609 root_dir,
3610 directory[root_prefix:],
3611 False,
3612 sum(f.size for f in buckets[directory].itervalues()),
3613 sum(f.nb_files for f in buckets[directory].itervalues()))
3614 # Remove the whole bucket.
3615 del buckets[directory]
3616
3617 # Reverse the mapping with what remains. The original instances are returned,
3618 # so the cached meta data is kept.
3619 files = sum((x.values() for x in buckets.itervalues()), [])
3620 return sorted(files, key=lambda x: x.path)
3621
3622
3623def trace(logfile, cmd, cwd, api, output):
3624 """Traces an executable. Returns (returncode, output) from api.
3625
3626 Arguments:
3627 - logfile: file to write to.
3628 - cmd: command to run.
3629 - cwd: current directory to start the process in.
3630 - api: a tracing api instance.
3631 - output: if True, returns output, otherwise prints it at the console.
3632 """
3633 cmd = fix_python_path(cmd)
3634 api.clean_trace(logfile)
3635 with api.get_tracer(logfile) as tracer:
3636 return tracer.trace(cmd, cwd, 'default', output)
3637
3638
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003639def CMDclean(args):
3640 """Cleans up traces."""
3641 parser = OptionParserTraceInputs(command='clean')
3642 options, args = parser.parse_args(args)
3643 api = get_api()
3644 api.clean_trace(options.log)
3645 return 0
3646
3647
3648def CMDtrace(args):
3649 """Traces an executable."""
3650 parser = OptionParserTraceInputs(command='trace')
3651 parser.allow_interspersed_args = False
3652 parser.add_option(
3653 '-q', '--quiet', action='store_true',
3654 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003655 parser.add_option(
3656 '-s', '--sudo', action='store_true',
3657 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3658 parser.add_option(
3659 '-n', '--no-sudo', action='store_false',
3660 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003661 options, args = parser.parse_args(args)
3662
3663 if not args:
3664 parser.error('Please provide a command to run')
3665
3666 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3667 args[0] = os.path.abspath(args[0])
3668
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003669 # options.sudo default value is None, which is to do whatever tracer defaults
3670 # do.
3671 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003672 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3673
3674
3675def CMDread(args):
3676 """Reads the logs and prints the result."""
3677 parser = OptionParserTraceInputs(command='read')
3678 parser.add_option(
3679 '-V', '--variable',
3680 nargs=2,
3681 action='append',
3682 dest='variables',
3683 metavar='VAR_NAME directory',
3684 default=[],
3685 help=('Variables to replace relative directories against. Example: '
3686 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3687 'home dir with $HOME') % getpass.getuser())
3688 parser.add_option(
3689 '--root-dir',
3690 help='Root directory to base everything off it. Anything outside of this '
3691 'this directory will not be reported')
3692 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003693 '--trace-name',
3694 help='Only reads one of the trace. Defaults to reading all traces')
3695 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003696 '-j', '--json', action='store_true',
3697 help='Outputs raw result data as json')
3698 parser.add_option(
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003699 '--trace-blacklist', action='append', default=[],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003700 help='List of regexp to use as blacklist filter')
3701 options, args = parser.parse_args(args)
3702
3703 if options.root_dir:
maruel@chromium.org7dc17b32013-04-02 22:27:05 +00003704 options.root_dir = get_native_path_case(
3705 unicode(os.path.abspath(options.root_dir)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003706
3707 variables = dict(options.variables)
3708 api = get_api()
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003709 blacklist = gen_blacklist(options.trace_blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003710 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003711 # Process each trace.
3712 output_as_json = []
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003713 try:
3714 for item in data:
3715 if 'exception' in item:
3716 # Do not abort the other traces.
3717 print >> sys.stderr, (
3718 'Trace %s: Got an exception: %s' % (
3719 item['trace'], item['exception'][1]))
3720 continue
3721 results = item['results']
3722 if options.root_dir:
3723 results = results.strip_root(options.root_dir)
3724
3725 if options.json:
3726 output_as_json.append(results.flatten())
3727 else:
3728 simplified = extract_directories(
3729 options.root_dir, results.files, blacklist)
3730 simplified = [f.replace_variables(variables) for f in simplified]
3731 if len(data) > 1:
3732 print('Trace: %s' % item['trace'])
3733 print('Total: %d' % len(results.files))
3734 print('Non existent: %d' % len(results.non_existent))
3735 for f in results.non_existent:
3736 print(' %s' % f.path)
3737 print(
3738 'Interesting: %d reduced to %d' % (
3739 len(results.existent), len(simplified)))
3740 for f in simplified:
3741 print(' %s' % f.path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003742
3743 if options.json:
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003744 write_json(sys.stdout, output_as_json, False)
3745 except KeyboardInterrupt:
3746 return 1
3747 except IOError as e:
3748 if e.errno == errno.EPIPE:
3749 # Do not print a stack trace when the output is piped to less and the user
3750 # quits before the whole output was written.
3751 return 1
3752 raise
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003753 return 0
3754
3755
3756class OptionParserWithLogging(optparse.OptionParser):
3757 """Adds --verbose option."""
3758 def __init__(self, verbose=0, **kwargs):
3759 optparse.OptionParser.__init__(self, **kwargs)
3760 self.add_option(
3761 '-v', '--verbose',
3762 action='count',
3763 default=verbose,
3764 help='Use multiple times to increase verbosity')
3765
3766 def parse_args(self, *args, **kwargs):
3767 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
3768 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
3769 logging.basicConfig(
3770 level=levels[min(len(levels)-1, options.verbose)],
3771 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
3772 return options, args
3773
3774
3775class OptionParserWithNiceDescription(OptionParserWithLogging):
3776 """Generates the description with the command's docstring."""
3777 def __init__(self, **kwargs):
3778 """Sets 'description' and 'usage' if not already specified."""
3779 command = kwargs.pop('command', 'help')
3780 kwargs.setdefault(
3781 'description',
3782 re.sub('[\r\n ]{2,}', ' ', get_command_handler(command).__doc__))
3783 kwargs.setdefault('usage', '%%prog %s [options]' % command)
3784 OptionParserWithLogging.__init__(self, **kwargs)
3785
3786
3787class OptionParserTraceInputs(OptionParserWithNiceDescription):
3788 """Adds automatic --log handling."""
3789 def __init__(self, **kwargs):
3790 OptionParserWithNiceDescription.__init__(self, **kwargs)
3791 self.add_option(
3792 '-l', '--log', help='Log file to generate or read, required')
3793
3794 def parse_args(self, *args, **kwargs):
3795 """Makes sure the paths make sense.
3796
3797 On Windows, / and \ are often mixed together in a path.
3798 """
3799 options, args = OptionParserWithNiceDescription.parse_args(
3800 self, *args, **kwargs)
3801 if not options.log:
3802 self.error('Must supply a log file with -l')
3803 options.log = os.path.abspath(options.log)
3804 return options, args
3805
3806
3807def extract_documentation():
3808 """Returns a dict {command: description} for each of documented command."""
3809 commands = (
3810 fn[3:]
3811 for fn in dir(sys.modules['__main__'])
3812 if fn.startswith('CMD') and get_command_handler(fn[3:]).__doc__)
3813 return dict((fn, get_command_handler(fn).__doc__) for fn in commands)
3814
3815
3816def CMDhelp(args):
3817 """Prints list of commands or help for a specific command."""
3818 doc = extract_documentation()
3819 # Calculates the optimal offset.
3820 offset = max(len(cmd) for cmd in doc)
3821 format_str = ' %-' + str(offset + 2) + 's %s'
3822 # Generate a one-liner documentation of each commands.
3823 commands_description = '\n'.join(
3824 format_str % (cmd, doc[cmd].split('\n')[0]) for cmd in sorted(doc))
3825
3826 parser = OptionParserWithNiceDescription(
3827 usage='%prog <command> [options]',
3828 description='Commands are:\n%s\n' % commands_description)
3829 parser.format_description = lambda _: parser.description
3830
3831 # Strip out any -h or --help argument.
3832 _, args = parser.parse_args([i for i in args if not i in ('-h', '--help')])
3833 if len(args) == 1:
3834 if not get_command_handler(args[0]):
3835 parser.error('Unknown command %s' % args[0])
3836 # The command was "%prog help command", replaces ourself with
3837 # "%prog command --help" so help is correctly printed out.
3838 return main(args + ['--help'])
3839 elif args:
3840 parser.error('Unknown argument "%s"' % ' '.join(args))
3841 parser.print_help()
3842 return 0
3843
3844
3845def get_command_handler(name):
3846 """Returns the command handler or CMDhelp if it doesn't exist."""
3847 return getattr(sys.modules['__main__'], 'CMD%s' % name, None)
3848
3849
3850def main_impl(argv):
3851 command = get_command_handler(argv[0] if argv else 'help')
3852 if not command:
3853 return CMDhelp(argv)
3854 return command(argv[1:])
3855
3856def main(argv):
maruel@chromium.orgac36fb72013-05-21 14:50:53 +00003857 disable_buffering()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003858 try:
3859 main_impl(argv)
3860 except TracingFailure, e:
3861 sys.stderr.write('\nError: ')
3862 sys.stderr.write(str(e))
3863 sys.stderr.write('\n')
3864 return 1
3865
3866
3867if __name__ == '__main__':
3868 sys.exit(main(sys.argv[1:]))