blob: f21c37e742e36356c5b9bf3c433bf63ab8fc4c2a [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
3# Copyright (c) 2012 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
maruel@chromium.org9e56b592013-05-21 16:56:05 +000023import errno
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000024import getpass
25import glob
26import json
27import logging
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000028import os
29import re
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000030import stat
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000031import subprocess
32import sys
33import tempfile
34import threading
35import time
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000036import unicodedata
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000037import weakref
38
maruel@chromium.orge5322512013-08-19 20:17:57 +000039from third_party import colorama
40from third_party.depot_tools import fix_encoding
41from third_party.depot_tools import subcommand
42
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000043from utils import tools
44
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000045## OS-specific imports
46
47if sys.platform == 'win32':
48 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p
49 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611
50 from ctypes.wintypes import GetLastError # pylint: disable=E0611
51elif sys.platform == 'darwin':
52 import Carbon.File # pylint: disable=F0401
53 import MacOS # pylint: disable=F0401
54
55
maruel@chromium.org3d671992013-08-20 00:38:27 +000056__version__ = '0.1'
57
58
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000059BASE_DIR = os.path.dirname(os.path.abspath(__file__))
60ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
61
62
63class TracingFailure(Exception):
64 """An exception occured during tracing."""
65 def __init__(self, description, pid, line_number, line, *args):
66 super(TracingFailure, self).__init__(
67 description, pid, line_number, line, *args)
68 self.description = description
69 self.pid = pid
70 self.line_number = line_number
71 self.line = line
72 self.extra = args
73
74 def __str__(self):
75 out = self.description
76 if self.pid:
77 out += '\npid: %d' % self.pid
78 if self.line_number:
79 out += '\nline: %d' % self.line_number
80 if self.line:
81 out += '\n%s' % self.line
82 if self.extra:
83 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
84 return out
85
86
87## OS-specific functions
88
89if sys.platform == 'win32':
90 def QueryDosDevice(drive_letter):
91 """Returns the Windows 'native' path for a DOS drive letter."""
92 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter
maruel@chromium.org306e0e72012-11-02 18:22:03 +000093 assert isinstance(drive_letter, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000094 # Guesswork. QueryDosDeviceW never returns the required number of bytes.
95 chars = 1024
maruel@chromium.org306e0e72012-11-02 18:22:03 +000096 drive_letter = drive_letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000097 p = create_unicode_buffer(chars)
98 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):
99 err = GetLastError()
100 if err:
101 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000102 msg = u'QueryDosDevice(%s): %s (%d)' % (
103 drive_letter, FormatError(err), err)
104 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000105 return p.value
106
107
108 def GetShortPathName(long_path):
109 """Returns the Windows short path equivalent for a 'long' path."""
maruel@chromium.org68bb1482013-02-15 22:36:24 +0000110 assert isinstance(long_path, unicode), repr(long_path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000111 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
112 # not enforced.
113 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):
114 long_path = '\\\\?\\' + long_path
115 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)
116 if chars:
117 p = create_unicode_buffer(chars)
118 if windll.kernel32.GetShortPathNameW(long_path, p, chars):
119 return p.value
120
121 err = GetLastError()
122 if err:
123 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000124 msg = u'GetShortPathName(%s): %s (%d)' % (
125 long_path, FormatError(err), err)
126 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000127
128
129 def GetLongPathName(short_path):
130 """Returns the Windows long path equivalent for a 'short' path."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000131 assert isinstance(short_path, unicode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000132 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is
133 # not enforced.
134 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):
135 short_path = '\\\\?\\' + short_path
136 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)
137 if chars:
138 p = create_unicode_buffer(chars)
139 if windll.kernel32.GetLongPathNameW(short_path, p, chars):
140 return p.value
141
142 err = GetLastError()
143 if err:
144 # pylint: disable=E0602
maruel@chromium.orgf37c2472012-11-02 20:06:13 +0000145 msg = u'GetLongPathName(%s): %s (%d)' % (
146 short_path, FormatError(err), err)
147 raise WindowsError(err, msg.encode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000148
149
150 def get_current_encoding():
151 """Returns the 'ANSI' code page associated to the process."""
152 return 'cp%d' % int(windll.kernel32.GetACP())
153
154
155 class DosDriveMap(object):
156 """Maps \Device\HarddiskVolumeN to N: on Windows."""
157 # Keep one global cache.
158 _MAPPING = {}
159
160 def __init__(self):
161 """Lazy loads the cache."""
162 if not self._MAPPING:
163 # This is related to UNC resolver on windows. Ignore that.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000164 self._MAPPING[u'\\Device\\Mup'] = None
165 self._MAPPING[u'\\SystemRoot'] = os.environ[u'SystemRoot']
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000166
167 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):
168 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000169 letter = u'%s:' % letter
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000170 mapped = QueryDosDevice(letter)
171 if mapped in self._MAPPING:
172 logging.warn(
173 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'
174 '. Drive letters are a user-mode concept and the kernel '
175 'traces only have NT path, so all accesses will be '
176 'associated with the first drive letter, independent of the '
177 'actual letter used by the code') % (
178 self._MAPPING[mapped], letter))
179 else:
180 self._MAPPING[mapped] = letter
181 except WindowsError: # pylint: disable=E0602
182 pass
183
184 def to_win32(self, path):
185 """Converts a native NT path to Win32/DOS compatible path."""
186 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)
187 if not match:
188 raise ValueError(
189 'Can\'t convert %s into a Win32 compatible path' % path,
190 path)
191 if not match.group(1) in self._MAPPING:
192 # Unmapped partitions may be accessed by windows for the
193 # fun of it while the test is running. Discard these.
194 return None
195 drive = self._MAPPING[match.group(1)]
196 if not drive or not match.group(2):
197 return drive
198 return drive + match.group(2)
199
200
201 def isabs(path):
202 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""
203 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'
204
205
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000206 def find_item_native_case(root, item):
207 """Gets the native path case of a single item based at root_path."""
208 if item == '..':
209 return item
210
211 root = get_native_path_case(root)
212 return os.path.basename(get_native_path_case(os.path.join(root, item)))
213
214
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000215 def get_native_path_case(p):
216 """Returns the native path case for an existing file.
217
218 On Windows, removes any leading '\\?\'.
219 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000220 assert isinstance(p, unicode), repr(p)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000221 if not isabs(p):
222 raise ValueError(
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000223 'get_native_path_case(%r): Require an absolute path' % p, p)
224
maruel@chromium.org037758d2012-12-10 17:59:46 +0000225 # Make sure it is normalized to os.path.sep. Do not do it here to keep the
226 # function fast
227 assert '/' not in p, p
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000228 suffix = ''
229 count = p.count(':')
230 if count > 1:
231 # This means it has an alternate-data stream. There could be 3 ':', since
232 # it could be the $DATA datastream of an ADS. Split the whole ADS suffix
233 # off and add it back afterward. There is no way to know the native path
234 # case of an alternate data stream.
235 items = p.split(':')
236 p = ':'.join(items[0:2])
237 suffix = ''.join(':' + i for i in items[2:])
238
maruel@chromium.org42b091d2013-04-02 22:27:02 +0000239 # TODO(maruel): Use os.path.normpath?
240 if p.endswith('.\\'):
241 p = p[:-2]
242
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000243 # Windows used to have an option to turn on case sensitivity on non Win32
244 # subsystem but that's out of scope here and isn't supported anymore.
245 # Go figure why GetShortPathName() is needed.
246 try:
247 out = GetLongPathName(GetShortPathName(p))
248 except OSError, e:
249 if e.args[0] in (2, 3, 5):
250 # The path does not exist. Try to recurse and reconstruct the path.
251 base = os.path.dirname(p)
252 rest = os.path.basename(p)
253 return os.path.join(get_native_path_case(base), rest)
254 raise
255 if out.startswith('\\\\?\\'):
256 out = out[4:]
257 # Always upper case the first letter since GetLongPathName() will return the
258 # drive letter in the case it was given.
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +0000259 return out[0].upper() + out[1:] + suffix
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000260
261
262 def CommandLineToArgvW(command_line):
263 """Splits a commandline into argv using CommandLineToArgvW()."""
264 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
265 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000266 assert isinstance(command_line, unicode)
267 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000268 try:
269 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
270 finally:
271 windll.kernel32.LocalFree(ptr)
272
273
274elif sys.platform == 'darwin':
275
276
277 # On non-windows, keep the stdlib behavior.
278 isabs = os.path.isabs
279
280
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000281 def _native_case(p):
282 """Gets the native path case. Warning: this function resolves symlinks."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000283 try:
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000284 rel_ref, _ = Carbon.File.FSPathMakeRef(p.encode('utf-8'))
285 # The OSX underlying code uses NFD but python strings are in NFC. This
286 # will cause issues with os.listdir() for example. Since the dtrace log
287 # *is* in NFC, normalize it here.
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000288 out = unicodedata.normalize(
289 'NFC', rel_ref.FSRefMakePath().decode('utf-8'))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000290 if p.endswith(os.path.sep) and not out.endswith(os.path.sep):
291 return out + os.path.sep
292 return out
293 except MacOS.Error, e:
294 if e.args[0] in (-43, -120):
295 # The path does not exist. Try to recurse and reconstruct the path.
296 # -43 means file not found.
297 # -120 means directory not found.
298 base = os.path.dirname(p)
299 rest = os.path.basename(p)
csharp@chromium.org5a2c3732013-03-07 14:44:56 +0000300 return os.path.join(_native_case(base), rest)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000301 raise OSError(
302 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])
303
304
305 def _split_at_symlink_native(base_path, rest):
306 """Returns the native path for a symlink."""
307 base, symlink, rest = split_at_symlink(base_path, rest)
308 if symlink:
309 if not base_path:
310 base_path = base
311 else:
312 base_path = safe_join(base_path, base)
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000313 symlink = find_item_native_case(base_path, symlink)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000314 return base, symlink, rest
315
316
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000317 def find_item_native_case(root_path, item):
318 """Gets the native path case of a single item based at root_path.
319
320 There is no API to get the native path case of symlinks on OSX. So it
321 needs to be done the slow way.
322 """
323 if item == '..':
324 return item
325
326 item = item.lower()
327 for element in os.listdir(root_path):
328 if element.lower() == item:
329 return element
330
331
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000332 def get_native_path_case(path):
333 """Returns the native path case for an existing file.
334
335 Technically, it's only HFS+ on OSX that is case preserving and
336 insensitive. It's the default setting on HFS+ but can be changed.
337 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000338 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000339 if not isabs(path):
340 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000341 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000342 if path.startswith('/dev'):
343 # /dev is not visible from Carbon, causing an exception.
344 return path
345
346 # Starts assuming there is no symlink along the path.
347 resolved = _native_case(path)
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000348 if path.lower() in (resolved.lower(), resolved.lower() + './'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000349 # This code path is incredibly faster.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000350 logging.debug('get_native_path_case(%s) = %s' % (path, resolved))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000351 return resolved
352
353 # There was a symlink, process it.
354 base, symlink, rest = _split_at_symlink_native(None, path)
355 assert symlink, (path, base, symlink, rest, resolved)
356 prev = base
357 base = safe_join(_native_case(base), symlink)
358 assert len(base) > len(prev)
359 while rest:
360 prev = base
361 relbase, symlink, rest = _split_at_symlink_native(base, rest)
362 base = safe_join(base, relbase)
363 assert len(base) > len(prev), (prev, base, symlink)
364 if symlink:
365 base = safe_join(base, symlink)
366 assert len(base) > len(prev), (prev, base, symlink)
367 # Make sure no symlink was resolved.
368 assert base.lower() == path.lower(), (base, path)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000369 logging.debug('get_native_path_case(%s) = %s' % (path, base))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000370 return base
371
372
373else: # OSes other than Windows and OSX.
374
375
376 # On non-windows, keep the stdlib behavior.
377 isabs = os.path.isabs
378
379
csharp@chromium.orgf2eacff2013-04-04 14:20:20 +0000380 def find_item_native_case(root, item):
381 """Gets the native path case of a single item based at root_path."""
382 if item == '..':
383 return item
384
385 root = get_native_path_case(root)
386 return os.path.basename(get_native_path_case(os.path.join(root, item)))
387
388
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000389 def get_native_path_case(path):
390 """Returns the native path case for an existing file.
391
392 On OSes other than OSX and Windows, assume the file system is
393 case-sensitive.
394
395 TODO(maruel): This is not strictly true. Implement if necessary.
396 """
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000397 assert isinstance(path, unicode), repr(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000398 if not isabs(path):
399 raise ValueError(
maruel@chromium.orgd1e9fbd2013-02-26 14:42:24 +0000400 'get_native_path_case(%r): Require an absolute path' % path, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000401 # Give up on cygwin, as GetLongPathName() can't be called.
402 # Linux traces tends to not be normalized so use this occasion to normalize
403 # it. This function implementation already normalizes the path on the other
404 # OS so this needs to be done here to be coherent between OSes.
405 out = os.path.normpath(path)
406 if path.endswith(os.path.sep) and not out.endswith(os.path.sep):
407 return out + os.path.sep
408 return out
409
410
411if sys.platform != 'win32': # All non-Windows OSes.
412
413
414 def safe_join(*args):
415 """Joins path elements like os.path.join() but doesn't abort on absolute
416 path.
417
418 os.path.join('foo', '/bar') == '/bar'
419 but safe_join('foo', '/bar') == 'foo/bar'.
420 """
421 out = ''
422 for element in args:
423 if element.startswith(os.path.sep):
424 if out.endswith(os.path.sep):
425 out += element[1:]
426 else:
427 out += element
428 else:
429 if out.endswith(os.path.sep):
430 out += element
431 else:
432 out += os.path.sep + element
433 return out
434
435
436 def split_at_symlink(base_dir, relfile):
437 """Scans each component of relfile and cut the string at the symlink if
438 there is any.
439
440 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if
441 not symlink was found.
442 """
443 if base_dir:
444 assert relfile
445 assert os.path.isabs(base_dir)
446 index = 0
447 else:
448 assert os.path.isabs(relfile)
449 index = 1
450
451 def at_root(rest):
452 if base_dir:
453 return safe_join(base_dir, rest)
454 return rest
455
456 while True:
457 try:
458 index = relfile.index(os.path.sep, index)
459 except ValueError:
460 index = len(relfile)
461 full = at_root(relfile[:index])
462 if os.path.islink(full):
463 # A symlink!
464 base = os.path.dirname(relfile[:index])
465 symlink = os.path.basename(relfile[:index])
466 rest = relfile[index:]
467 logging.debug(
468 'split_at_symlink(%s, %s) -> (%s, %s, %s)' %
469 (base_dir, relfile, base, symlink, rest))
470 return base, symlink, rest
471 if index == len(relfile):
472 break
473 index += 1
474 return relfile, None, None
475
476
maruel@chromium.org3683afe2013-07-27 00:09:27 +0000477def gen_blacklist(regexes):
478 """Returns a lambda to be used as a blacklist."""
479 compiled = [re.compile(i) for i in regexes]
480 def match(f):
481 return any(j.match(f) for j in compiled)
482 return match
483
484
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000485def create_subprocess_thunk():
486 """Creates a small temporary script to start the child process.
487
488 This thunk doesn't block, its unique name is used to identify it as the
489 parent.
490 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000491 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000492 try:
493 os.write(
494 handle,
495 (
496 'import subprocess, sys\n'
497 'sys.exit(subprocess.call(sys.argv[2:]))\n'
498 ))
499 finally:
500 os.close(handle)
501 return name
502
503
504def create_exec_thunk():
505 """Creates a small temporary script to start the child executable.
506
507 Reads from the file handle provided as the fisrt argument to block, then
508 execv() the command to be traced.
509 """
510 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
511 try:
512 os.write(
513 handle,
514 (
515 'import os, sys\n'
516 'fd = int(sys.argv[1])\n'
517 # This will block until the controlling process writes a byte on the
518 # pipe. It will do so once the tracing tool, e.g. strace, is ready to
519 # trace.
520 'os.read(fd, 1)\n'
521 'os.close(fd)\n'
522 'os.execv(sys.argv[2], sys.argv[2:])\n'
523 ))
524 finally:
525 os.close(handle)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000526 return name
527
528
529def strace_process_quoted_arguments(text):
530 """Extracts quoted arguments on a string and return the arguments as a list.
531
532 Implemented as an automaton. Supports incomplete strings in the form
533 '"foo"...'.
534
535 Example:
536 With text = '"foo", "bar"', the function will return ['foo', 'bar']
537
538 TODO(maruel): Implement escaping.
539 """
540 # All the possible states of the DFA.
541 ( NEED_QUOTE, # Begining of a new arguments.
542 INSIDE_STRING, # Inside an argument.
543 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
544 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
545 # a serie of 3 dots or a comma.
546 NEED_SPACE, # Right after a comma
547 NEED_DOT_2, # Found a dot, need a second one.
548 NEED_DOT_3, # Found second dot, need a third one.
549 NEED_COMMA, # Found third dot, need a comma.
550 ) = range(8)
551
552 state = NEED_QUOTE
553 out = []
554 for index, char in enumerate(text):
555 if char == '"':
556 if state == NEED_QUOTE:
557 state = INSIDE_STRING
558 # A new argument was found.
559 out.append('')
560 elif state == INSIDE_STRING:
561 # The argument is now closed.
562 state = NEED_COMMA_OR_DOT
563 elif state == ESCAPED:
564 out[-1] += char
565 state = INSIDE_STRING
566 else:
567 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000568 'Can\'t process char \'%s\' at column %d for: %r' % (
569 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000570 index,
571 text)
572 elif char == ',':
573 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
574 state = NEED_SPACE
575 elif state == INSIDE_STRING:
576 out[-1] += char
577 elif state == ESCAPED:
578 out[-1] += char
579 state = INSIDE_STRING
580 else:
581 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000582 'Can\'t process char \'%s\' at column %d for: %r' % (
583 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000584 index,
585 text)
586 elif char == ' ':
587 if state == NEED_SPACE:
588 state = NEED_QUOTE
589 elif state == INSIDE_STRING:
590 out[-1] += char
591 elif state == ESCAPED:
592 out[-1] += char
593 state = INSIDE_STRING
594 else:
595 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000596 'Can\'t process char \'%s\' at column %d for: %r' % (
597 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000598 index,
599 text)
600 elif char == '.':
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000601 if state in (NEED_QUOTE, NEED_COMMA_OR_DOT):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000602 # The string is incomplete, this mean the strace -s flag should be
603 # increased.
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000604 # For NEED_QUOTE, the input string would look like '"foo", ...'.
605 # For NEED_COMMA_OR_DOT, the input string would look like '"foo"...'
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000606 state = NEED_DOT_2
607 elif state == NEED_DOT_2:
608 state = NEED_DOT_3
609 elif state == NEED_DOT_3:
610 state = NEED_COMMA
611 elif state == INSIDE_STRING:
612 out[-1] += char
613 elif state == ESCAPED:
614 out[-1] += char
615 state = INSIDE_STRING
616 else:
617 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000618 'Can\'t process char \'%s\' at column %d for: %r' % (
619 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000620 index,
621 text)
622 elif char == '\\':
623 if state == ESCAPED:
624 out[-1] += char
625 state = INSIDE_STRING
626 elif state == INSIDE_STRING:
627 state = ESCAPED
628 else:
629 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000630 'Can\'t process char \'%s\' at column %d for: %r' % (
631 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000632 index,
633 text)
634 else:
635 if state == INSIDE_STRING:
636 out[-1] += char
637 else:
638 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000639 'Can\'t process char \'%s\' at column %d for: %r' % (
640 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000641 index,
642 text)
643 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
644 raise ValueError(
645 'String is incorrectly terminated: %r' % text,
646 text)
647 return out
648
649
650def read_json(filepath):
651 with open(filepath, 'r') as f:
652 return json.load(f)
653
654
655def write_json(filepath_or_handle, data, dense):
656 """Writes data into filepath or file handle encoded as json.
657
658 If dense is True, the json is packed. Otherwise, it is human readable.
659 """
660 if hasattr(filepath_or_handle, 'write'):
661 if dense:
662 filepath_or_handle.write(
663 json.dumps(data, sort_keys=True, separators=(',',':')))
664 else:
665 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
666 else:
667 with open(filepath_or_handle, 'wb') as f:
668 if dense:
669 json.dump(data, f, sort_keys=True, separators=(',',':'))
670 else:
671 json.dump(data, f, sort_keys=True, indent=2)
672
673
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000674def assert_is_renderable(pseudo_string):
675 """Asserts the input is a valid object to be processed by render()."""
676 assert (
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000677 pseudo_string is None or
678 isinstance(pseudo_string, unicode) or
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000679 hasattr(pseudo_string, 'render')), repr(pseudo_string)
680
681
682def render(pseudo_string):
683 """Converts the pseudo-string to an unicode string."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000684 if pseudo_string is None or isinstance(pseudo_string, unicode):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000685 return pseudo_string
686 return pseudo_string.render()
687
688
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000689class Results(object):
690 """Results of a trace session."""
691
692 class _TouchedObject(object):
693 """Something, a file or a directory, that was accessed."""
694 def __init__(self, root, path, tainted, size, nb_files):
695 logging.debug(
696 '%s(%s, %s, %s, %s, %s)' %
697 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000698 assert_is_renderable(root)
699 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000700 self.root = root
701 self.path = path
702 self.tainted = tainted
703 self.nb_files = nb_files
maruel@chromium.org538141b2013-06-03 20:57:17 +0000704 # Can be used as a cache or a default value, depending on context. In
705 # particular, once self.tainted is True, because the path was replaced
706 # with a variable, it is not possible to look up the file size.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000707 self._size = size
708 # These are cache only.
709 self._real_path = None
710
711 # Check internal consistency.
712 assert path, path
713 assert tainted or bool(root) != bool(isabs(path)), (root, path)
714 assert tainted or (
715 not os.path.exists(self.full_path) or
716 (self.full_path == get_native_path_case(self.full_path))), (
717 tainted, self.full_path, get_native_path_case(self.full_path))
718
719 @property
720 def existent(self):
721 return self.size != -1
722
723 @property
724 def full_path(self):
725 if self.root:
726 return os.path.join(self.root, self.path)
727 return self.path
728
729 @property
730 def real_path(self):
731 """Returns the path with symlinks resolved."""
732 if not self._real_path:
733 self._real_path = os.path.realpath(self.full_path)
734 return self._real_path
735
736 @property
737 def size(self):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000738 """File's size. -1 is not existent.
739
740 Once tainted, it is not possible the retrieve the file size anymore since
741 the path is composed of variables.
742 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000743 if self._size is None and not self.tainted:
744 try:
745 self._size = os.stat(self.full_path).st_size
746 except OSError:
747 self._size = -1
748 return self._size
749
750 def flatten(self):
751 """Returns a dict representing this object.
752
753 A 'size' of 0 means the file was only touched and not read.
754 """
755 return {
756 'path': self.path,
757 'size': self.size,
758 }
759
760 def replace_variables(self, variables):
761 """Replaces the root of this File with one of the variables if it matches.
762
763 If a variable replacement occurs, the cloned object becomes tainted.
764 """
765 for variable, root_path in variables.iteritems():
766 if self.path.startswith(root_path):
767 return self._clone(
768 self.root, variable + self.path[len(root_path):], True)
769 # No need to clone, returns ourself.
770 return self
771
772 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000773 """Returns a clone of itself with 'root' stripped off.
774
775 Note that the file is kept if it is either accessible from a symlinked
776 path that was used to access the file or through the real path.
777 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000778 # Check internal consistency.
779 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root
780 if not self.full_path.startswith(root):
781 # Now try to resolve the symlinks to see if it can be reached this way.
782 # Only try *after* trying without resolving symlink.
783 if not self.real_path.startswith(root):
784 return None
785 path = self.real_path
786 else:
787 path = self.full_path
788 return self._clone(root, path[len(root):], self.tainted)
789
790 def _clone(self, new_root, new_path, tainted):
791 raise NotImplementedError(self.__class__.__name__)
792
793 class File(_TouchedObject):
794 """A file that was accessed. May not be present anymore.
795
796 If tainted is true, it means it is not a real path anymore as a variable
797 replacement occured.
798
maruel@chromium.org538141b2013-06-03 20:57:17 +0000799 |mode| can be one of None, TOUCHED, READ or WRITE.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000800 """
maruel@chromium.org538141b2013-06-03 20:57:17 +0000801 # Was probed for existence, and it is existent, but was never _opened_.
802 TOUCHED = 't'
803 # Opened for read only and guaranteed to not have been written to.
804 READ = 'r'
805 # Opened for write.
806 WRITE = 'w'
807
808 # They are listed in order of priority. E.g. if a file is traced as TOUCHED
809 # then as WRITE, only keep WRITE. None means no idea, which is a problem on
810 # Windows.
811 ACCEPTABLE_MODES = (None, TOUCHED, READ, WRITE)
812
813 def __init__(self, root, path, tainted, size, mode):
814 assert mode in self.ACCEPTABLE_MODES
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000815 super(Results.File, self).__init__(root, path, tainted, size, 1)
maruel@chromium.org538141b2013-06-03 20:57:17 +0000816 self.mode = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000817
818 def _clone(self, new_root, new_path, tainted):
819 """Clones itself keeping meta-data."""
820 # Keep the self.size and self._real_path caches for performance reason. It
821 # is also important when the file becomes tainted (with a variable instead
822 # of the real path) since self.path is not an on-disk path anymore so
823 # out._size cannot be updated.
maruel@chromium.org538141b2013-06-03 20:57:17 +0000824 out = self.__class__(new_root, new_path, tainted, self.size, self.mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000825 out._real_path = self._real_path
826 return out
827
maruel@chromium.org538141b2013-06-03 20:57:17 +0000828 def flatten(self):
829 out = super(Results.File, self).flatten()
830 out['mode'] = self.mode
831 return out
832
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000833 class Directory(_TouchedObject):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000834 """A directory of files. Must exist.
835
836 For a Directory instance, self.size is not a cache, it's an actual value
837 that is never modified and represents the total size of the files contained
838 in this directory. It is possible that the directory is empty so that
839 size==0; this happens if there's only an invalid symlink in it.
840 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000841 def __init__(self, root, path, tainted, size, nb_files):
842 """path='.' is a valid value and must be handled appropriately."""
843 assert not path.endswith(os.path.sep), path
844 super(Results.Directory, self).__init__(
845 root, path + os.path.sep, tainted, size, nb_files)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000846
847 def flatten(self):
848 out = super(Results.Directory, self).flatten()
849 out['nb_files'] = self.nb_files
850 return out
851
852 def _clone(self, new_root, new_path, tainted):
853 """Clones itself keeping meta-data."""
854 out = self.__class__(
855 new_root,
856 new_path.rstrip(os.path.sep),
857 tainted,
858 self.size,
859 self.nb_files)
860 out._real_path = self._real_path
861 return out
862
863 class Process(object):
864 """A process that was traced.
865
866 Contains references to the files accessed by this process and its children.
867 """
868 def __init__(self, pid, files, executable, command, initial_cwd, children):
869 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
870 self.pid = pid
871 self.files = sorted(files, key=lambda x: x.path)
872 self.children = children
873 self.executable = executable
874 self.command = command
875 self.initial_cwd = initial_cwd
876
877 # Check internal consistency.
878 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
879 f.path for f in self.files)
880 assert isinstance(self.children, list)
881 assert isinstance(self.files, list)
882
883 @property
884 def all(self):
885 for child in self.children:
886 for i in child.all:
887 yield i
888 yield self
889
890 def flatten(self):
891 return {
892 'children': [c.flatten() for c in self.children],
893 'command': self.command,
894 'executable': self.executable,
895 'files': [f.flatten() for f in self.files],
896 'initial_cwd': self.initial_cwd,
897 'pid': self.pid,
898 }
899
900 def strip_root(self, root):
901 assert isabs(root) and root.endswith(os.path.sep), root
902 # Loads the files after since they are constructed as objects.
903 out = self.__class__(
904 self.pid,
905 filter(None, (f.strip_root(root) for f in self.files)),
906 self.executable,
907 self.command,
908 self.initial_cwd,
909 [c.strip_root(root) for c in self.children])
910 logging.debug(
911 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
912 return out
913
914 def __init__(self, process):
915 self.process = process
916 # Cache.
917 self._files = None
918
919 def flatten(self):
920 return {
921 'root': self.process.flatten(),
922 }
923
924 @property
925 def files(self):
926 if self._files is None:
927 self._files = sorted(
928 sum((p.files for p in self.process.all), []),
929 key=lambda x: x.path)
930 return self._files
931
932 @property
933 def existent(self):
934 return [f for f in self.files if f.existent]
935
936 @property
937 def non_existent(self):
938 return [f for f in self.files if not f.existent]
939
940 def strip_root(self, root):
941 """Returns a clone with all the files outside the directory |root| removed
942 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000943
944 It keeps files accessible through the |root| directory or that have been
945 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000946 """
947 # Resolve any symlink
948 root = os.path.realpath(root)
949 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep
950 logging.debug('strip_root(%s)' % root)
951 return Results(self.process.strip_root(root))
952
953
954class ApiBase(object):
955 """OS-agnostic API to trace a process and its children."""
956 class Context(object):
957 """Processes one log line at a time and keeps the list of traced processes.
958
959 The parsing is complicated by the fact that logs are traced out of order for
960 strace but in-order for dtrace and logman. In addition, on Windows it is
961 very frequent that processids are reused so a flat list cannot be used. But
962 at the same time, it is impossible to faithfully construct a graph when the
963 logs are processed out of order. So both a tree and a flat mapping are used,
964 the tree is the real process tree, while the flat mapping stores the last
965 valid process for the corresponding processid. For the strace case, the
966 tree's head is guessed at the last moment.
967 """
968 class Process(object):
969 """Keeps context for one traced child process.
970
971 Logs all the files this process touched. Ignores directories.
972 """
973 def __init__(self, blacklist, pid, initial_cwd):
974 # Check internal consistency.
975 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000976 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000977 self.pid = pid
978 # children are Process instances.
979 self.children = []
980 self.initial_cwd = initial_cwd
981 self.cwd = None
maruel@chromium.org538141b2013-06-03 20:57:17 +0000982 self.files = {}
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000983 self.executable = None
984 self.command = None
985 self._blacklist = blacklist
986
987 def to_results_process(self):
988 """Resolves file case sensitivity and or late-bound strings."""
989 # When resolving files, it's normal to get dupe because a file could be
990 # opened multiple times with different case. Resolve the deduplication
991 # here.
992 def fix_path(x):
993 """Returns the native file path case.
994
995 Converts late-bound strings.
996 """
997 if not x:
998 # Do not convert None instance to 'None'.
999 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001000 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001001 if os.path.isabs(x):
1002 # If the path is not absolute, which tends to happen occasionally on
1003 # Windows, it is not possible to get the native path case so ignore
1004 # that trace. It mostly happens for 'executable' value.
1005 x = get_native_path_case(x)
1006 return x
1007
maruel@chromium.org538141b2013-06-03 20:57:17 +00001008 def fix_and_blacklist_path(x, m):
1009 """Receives a tuple (filepath, mode) and processes filepath."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001010 x = fix_path(x)
1011 if not x:
1012 return
1013 # The blacklist needs to be reapplied, since path casing could
1014 # influence blacklisting.
1015 if self._blacklist(x):
1016 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001017 # Filters out directories. Some may have passed through.
1018 if os.path.isdir(x):
1019 return
1020 return x, m
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001021
maruel@chromium.org538141b2013-06-03 20:57:17 +00001022 # Renders all the files as strings, as some could be RelativePath
1023 # instances. It is important to do it first since there could still be
1024 # multiple entries with the same path but different modes.
1025 rendered = (
1026 fix_and_blacklist_path(f, m) for f, m in self.files.iteritems())
1027 files = sorted(
1028 (f for f in rendered if f),
1029 key=lambda x: (x[0], Results.File.ACCEPTABLE_MODES.index(x[1])))
1030 # Then converting into a dict will automatically clean up lesser
1031 # important values.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001032 files = [
maruel@chromium.org538141b2013-06-03 20:57:17 +00001033 Results.File(None, f, False, None, m)
1034 for f, m in dict(files).iteritems()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001035 ]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001036 return Results.Process(
1037 self.pid,
1038 files,
1039 fix_path(self.executable),
1040 self.command,
1041 fix_path(self.initial_cwd),
1042 [c.to_results_process() for c in self.children])
1043
maruel@chromium.org538141b2013-06-03 20:57:17 +00001044 def add_file(self, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001045 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001046 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001047 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001048 logging.debug('add_file(%d, %s, %s)', self.pid, filepath, mode)
1049 # Note that filepath and not render(filepath) is added. It is because
1050 # filepath could be something else than a string, like a RelativePath
1051 # instance for dtrace logs.
1052 modes = Results.File.ACCEPTABLE_MODES
1053 old_mode = self.files.setdefault(filepath, mode)
1054 if old_mode != mode and modes.index(old_mode) < modes.index(mode):
1055 # Take the highest value.
1056 self.files[filepath] = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001057
1058 def __init__(self, blacklist):
1059 self.blacklist = blacklist
1060 # Initial process.
1061 self.root_process = None
1062 # dict to accelerate process lookup, to not have to lookup the whole graph
1063 # each time.
1064 self._process_lookup = {}
1065
1066 class Tracer(object):
1067 """During it's lifetime, the tracing subsystem is enabled."""
1068 def __init__(self, logname):
1069 self._logname = logname
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001070 self._lock = threading.RLock()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001071 self._traces = []
1072 self._initialized = True
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001073 self._scripts_to_cleanup = []
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001074
1075 def trace(self, cmd, cwd, tracename, output):
1076 """Runs the OS-specific trace program on an executable.
1077
1078 Arguments:
1079 - cmd: The command (a list) to run.
1080 - cwd: Current directory to start the child process in.
1081 - tracename: Name of the trace in the logname file.
1082 - output: If False, redirects output to PIPEs.
1083
1084 Returns a tuple (resultcode, output) and updates the internal trace
1085 entries.
1086 """
1087 # The implementation adds an item to self._traces.
1088 raise NotImplementedError(self.__class__.__name__)
1089
1090 def close(self, _timeout=None):
1091 """Saves the meta-data in the logname file.
1092
1093 For kernel-based tracing, stops the tracing subsystem.
1094
1095 Must not be used manually when using 'with' construct.
1096 """
1097 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001098 if not self._initialized:
1099 raise TracingFailure(
1100 'Called %s.close() on an unitialized object' %
1101 self.__class__.__name__,
1102 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001103 try:
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001104 while self._scripts_to_cleanup:
1105 try:
1106 os.remove(self._scripts_to_cleanup.pop())
1107 except OSError as e:
1108 logging.error('Failed to delete a temporary script: %s', e)
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001109 write_json(self._logname, self._gen_logdata(), False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001110 finally:
1111 self._initialized = False
1112
1113 def post_process_log(self):
1114 """Post-processes the log so it becomes faster to load afterward.
1115
1116 Must not be used manually when using 'with' construct.
1117 """
1118 assert not self._initialized, 'Must stop tracing first.'
1119
maruel@chromium.org808e3f22013-05-16 16:42:54 +00001120 def _gen_logdata(self):
1121 """Returns the data to be saved in the trace file."""
1122 return {
1123 'traces': self._traces,
1124 }
1125
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001126 def __enter__(self):
1127 """Enables 'with' statement."""
1128 return self
1129
1130 def __exit__(self, exc_type, exc_value, traceback):
1131 """Enables 'with' statement."""
1132 self.close()
1133 # If an exception was thrown, do not process logs.
1134 if not exc_type:
1135 self.post_process_log()
1136
1137 def get_tracer(self, logname):
1138 """Returns an ApiBase.Tracer instance.
1139
1140 Initializes the tracing subsystem, which is a requirement for kernel-based
1141 tracers. Only one tracer instance should be live at a time!
1142
1143 logname is the filepath to the json file that will contain the meta-data
1144 about the logs.
1145 """
1146 return self.Tracer(logname)
1147
1148 @staticmethod
1149 def clean_trace(logname):
1150 """Deletes an old log."""
1151 raise NotImplementedError()
1152
1153 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001154 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001155 """Processes trace logs and returns the files opened and the files that do
1156 not exist.
1157
1158 It does not track directories.
1159
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001160 Arguments:
1161 - logname: must be an absolute path.
1162 - blacklist: must be a lambda.
1163 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001164
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001165 Most of the time, files that do not exist are temporary test files that
1166 should be put in /tmp instead. See http://crbug.com/116251.
1167
1168 Returns a list of dict with keys:
1169 - results: A Results instance.
1170 - trace: The corresponding tracename parameter provided to
1171 get_tracer().trace().
1172 - output: Output gathered during execution, if get_tracer().trace(...,
1173 output=False) was used.
1174 """
1175 raise NotImplementedError(cls.__class__.__name__)
1176
1177
1178class Strace(ApiBase):
1179 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001180 @staticmethod
1181 def load_filename(filename):
1182 """Parses a filename in a log."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001183 # TODO(maruel): Be compatible with strace -x.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001184 assert isinstance(filename, str)
1185 out = ''
1186 i = 0
1187 while i < len(filename):
1188 c = filename[i]
1189 if c == '\\':
1190 out += chr(int(filename[i+1:i+4], 8))
1191 i += 4
1192 else:
1193 out += c
1194 i += 1
1195 # TODO(maruel): That's not necessarily true that the current code page is
1196 # utf-8.
1197 return out.decode('utf-8')
1198
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001199 class Context(ApiBase.Context):
1200 """Processes a strace log line and keeps the list of existent and non
1201 existent files accessed.
1202
1203 Ignores directories.
1204
1205 Uses late-binding to processes the cwd of each process. The problem is that
1206 strace generates one log file per process it traced but doesn't give any
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001207 information about which process was started when and by who. So process the
1208 logs out of order and use late binding with RelativePath to be able to
1209 deduce the initial directory of each process once all the logs are parsed.
1210
1211 TODO(maruel): Use the script even in the non-sudo case, so log parsing can
1212 be done in two phase: first find the root process, then process the child
1213 processes in order. With that, it should be possible to not use RelativePath
1214 anymore. This would significantly simplify the code!
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001215 """
1216 class Process(ApiBase.Context.Process):
1217 """Represents the state of a process.
1218
1219 Contains all the information retrieved from the pid-specific log.
1220 """
1221 # Function names are using ([a-z_0-9]+)
1222 # This is the most common format. function(args) = result
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001223 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.*?)\)\s+= (.+)$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001224 # An interrupted function call, only grab the minimal header.
1225 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
1226 # A resumed function call.
1227 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
1228 # A process received a signal.
1229 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
1230 # A process didn't handle a signal. Ignore any junk appearing before,
1231 # because the process was forcibly killed so it won't open any new file.
1232 RE_KILLED = re.compile(
1233 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
1234 # The process has exited.
1235 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
1236 # A call was canceled. Ignore any prefix.
1237 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
1238 # Happens when strace fails to even get the function name.
1239 UNNAMED_FUNCTION = '????'
1240
1241 # Corner-case in python, a class member function decorator must not be
1242 # @staticmethod.
1243 def parse_args(regexp, expect_zero): # pylint: disable=E0213
1244 """Automatically convert the str 'args' into a list of processed
1245 arguments.
1246
1247 Arguments:
1248 - regexp is used to parse args.
1249 - expect_zero: one of True, False or None.
1250 - True: will check for result.startswith('0') first and will ignore
1251 the trace line completely otherwise. This is important because for
1252 many functions, the regexp will not process if the call failed.
1253 - False: will check for not result.startswith(('?', '-1')) for the
1254 same reason than with True.
1255 - None: ignore result.
1256 """
1257 def meta_hook(function):
1258 assert function.__name__.startswith('handle_')
1259 def hook(self, args, result):
1260 if expect_zero is True and not result.startswith('0'):
1261 return
1262 if expect_zero is False and result.startswith(('?', '-1')):
1263 return
1264 match = re.match(regexp, args)
1265 if not match:
1266 raise TracingFailure(
1267 'Failed to parse %s(%s) = %s' %
1268 (function.__name__[len('handle_'):], args, result),
1269 None, None, None)
1270 return function(self, match.groups(), result)
1271 return hook
1272 return meta_hook
1273
1274 class RelativePath(object):
1275 """A late-bound relative path."""
1276 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001277 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001278 self.parent = parent
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001279 assert (
1280 value is None or
1281 (isinstance(value, unicode) and not os.path.isabs(value)))
1282 self.value = value
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001283 if self.value:
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001284 # TODO(maruel): On POSIX, '\\' is a valid character so remove this
1285 # assert.
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001286 assert '\\' not in self.value, value
1287 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001288
1289 def render(self):
1290 """Returns the current directory this instance is representing.
1291
1292 This function is used to return the late-bound value.
1293 """
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001294 assert self.parent is not None
1295 parent = render(self.parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001296 if self.value:
1297 return os.path.normpath(os.path.join(parent, self.value))
1298 return parent
1299
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001300 def __init__(self, root, pid):
1301 """Keeps enough information to be able to guess the original process
1302 root.
1303
1304 strace doesn't store which process was the initial process. So more
1305 information needs to be kept so the graph can be reconstructed from the
1306 flat map.
1307 """
1308 logging.info('%s(%d)' % (self.__class__.__name__, pid))
1309 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
1310 assert isinstance(root, ApiBase.Context)
1311 self._root = weakref.ref(root)
1312 # The dict key is the function name of the pending call, like 'open'
1313 # or 'execve'.
1314 self._pending_calls = {}
1315 self._line_number = 0
1316 # Current directory when the process started.
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001317 if isinstance(self._root(), unicode):
1318 self.initial_cwd = self._root()
1319 else:
1320 self.initial_cwd = self.RelativePath(self._root(), None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001321 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001322 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001323
1324 def get_cwd(self):
1325 """Returns the best known value of cwd."""
1326 return self.cwd or self.initial_cwd
1327
1328 def render(self):
1329 """Returns the string value of the RelativePath() object.
1330
1331 Used by RelativePath. Returns the initial directory and not the
1332 current one since the current directory 'cwd' validity is time-limited.
1333
1334 The validity is only guaranteed once all the logs are processed.
1335 """
1336 return self.initial_cwd.render()
1337
1338 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001339 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001340 self._line_number += 1
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001341 try:
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001342 if self._done:
1343 raise TracingFailure(
1344 'Found a trace for a terminated process or corrupted log',
1345 None, None, None)
1346
1347 if self.RE_SIGNAL.match(line):
1348 # Ignore signals.
1349 return
1350
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001351 match = self.RE_KILLED.match(line)
1352 if match:
1353 # Converts a '+++ killed by Foo +++' trace into an exit_group().
1354 self.handle_exit_group(match.group(1), None)
1355 return
1356
1357 match = self.RE_PROCESS_EXITED.match(line)
1358 if match:
1359 # Converts a '+++ exited with 1 +++' trace into an exit_group()
1360 self.handle_exit_group(match.group(1), None)
1361 return
1362
1363 match = self.RE_UNFINISHED.match(line)
1364 if match:
1365 if match.group(1) in self._pending_calls:
1366 raise TracingFailure(
1367 'Found two unfinished calls for the same function',
1368 None, None, None,
1369 self._pending_calls)
1370 self._pending_calls[match.group(1)] = (
1371 match.group(1) + match.group(2))
1372 return
1373
1374 match = self.RE_UNAVAILABLE.match(line)
1375 if match:
1376 # This usually means a process was killed and a pending call was
1377 # canceled.
1378 # TODO(maruel): Look up the last exit_group() trace just above and
1379 # make sure any self._pending_calls[anything] is properly flushed.
1380 return
1381
1382 match = self.RE_RESUMED.match(line)
1383 if match:
1384 if match.group(1) not in self._pending_calls:
1385 raise TracingFailure(
1386 'Found a resumed call that was not logged as unfinished',
1387 None, None, None,
1388 self._pending_calls)
1389 pending = self._pending_calls.pop(match.group(1))
1390 # Reconstruct the line.
1391 line = pending + match.group(2)
1392
1393 match = self.RE_HEADER.match(line)
1394 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001395 # The line is corrupted. It happens occasionally when a process is
1396 # killed forcibly with activity going on. Assume the process died.
1397 # No other line can be processed afterward.
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001398 logging.debug('%d is done: %s', self.pid, line)
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001399 self._done = True
1400 return
1401
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001402 if match.group(1) == self.UNNAMED_FUNCTION:
1403 return
1404
1405 # It's a valid line, handle it.
1406 handler = getattr(self, 'handle_%s' % match.group(1), None)
1407 if not handler:
1408 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1409 return handler(match.group(2), match.group(3))
1410 except TracingFailure, e:
1411 # Hack in the values since the handler could be a static function.
1412 e.pid = self.pid
1413 e.line = line
1414 e.line_number = self._line_number
1415 # Re-raise the modified exception.
1416 raise
1417 except (KeyError, NotImplementedError, ValueError), e:
1418 raise TracingFailure(
1419 'Trace generated a %s exception: %s' % (
1420 e.__class__.__name__, str(e)),
1421 self.pid,
1422 self._line_number,
1423 line,
1424 e)
1425
1426 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1427 def handle_access(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001428 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001429
1430 @parse_args(r'^\"(.+?)\"$', True)
1431 def handle_chdir(self, args, _result):
1432 """Updates cwd."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001433 self.cwd = self._mangle(args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001434 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1435
maruel@chromium.org538141b2013-06-03 20:57:17 +00001436 @parse_args(r'^\"(.+?)\", (\d+), (\d+)$', False)
1437 def handle_chown(self, args, _result):
1438 # TODO(maruel): Look at result?
1439 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org55550722013-05-28 14:43:14 +00001440
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001441 def handle_clone(self, _args, result):
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001442 self._handling_forking('clone', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001443
1444 def handle_close(self, _args, _result):
1445 pass
1446
maruel@chromium.org538141b2013-06-03 20:57:17 +00001447 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1448 def handle_chmod(self, args, _result):
1449 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001450
maruel@chromium.org538141b2013-06-03 20:57:17 +00001451 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1452 def handle_creat(self, args, _result):
1453 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001454
1455 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1456 def handle_execve(self, args, _result):
1457 # Even if in practice execve() doesn't returns when it succeeds, strace
1458 # still prints '0' as the result.
1459 filepath = args[0]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001460 self._handle_file(filepath, Results.File.READ)
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001461 self.executable = self._mangle(filepath)
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001462 try:
1463 self.command = strace_process_quoted_arguments(args[1])
1464 except ValueError as e:
1465 raise TracingFailure(
1466 'Failed to process command line argument:\n%s' % e.args[0],
1467 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001468
1469 def handle_exit_group(self, _args, _result):
1470 """Removes cwd."""
1471 self.cwd = None
1472
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001473 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1474 def handle_faccessat(self, args, _results):
1475 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001476 self._handle_file(args[1], Results.File.TOUCHED)
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001477 else:
1478 raise Exception('Relative faccess not implemented.')
1479
maruel@chromium.org0781f322013-05-28 19:45:49 +00001480 def handle_fallocate(self, _args, result):
1481 pass
1482
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001483 def handle_fork(self, args, result):
1484 self._handle_unknown('fork', args, result)
1485
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001486 def handle_futex(self, _args, _result):
1487 pass
1488
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001489 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1490 def handle_getcwd(self, args, _result):
1491 if os.path.isabs(args[0]):
1492 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1493 if not isinstance(self.cwd, unicode):
1494 # Take the occasion to reset the path.
1495 self.cwd = self._mangle(args[0])
1496 else:
1497 # It should always match.
1498 assert self.cwd == Strace.load_filename(args[0]), (
1499 self.cwd, args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001500
1501 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1502 def handle_link(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001503 self._handle_file(args[0], Results.File.READ)
1504 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001505
1506 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1507 def handle_lstat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001508 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001509
1510 def handle_mkdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001511 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001512 pass
1513
1514 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1515 def handle_open(self, args, _result):
1516 if 'O_DIRECTORY' in args[1]:
1517 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001518 self._handle_file(
1519 args[0],
1520 Results.File.READ if 'O_RDONLY' in args[1] else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001521
1522 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1523 def handle_openat(self, args, _result):
1524 if 'O_DIRECTORY' in args[2]:
1525 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001526 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001527 self._handle_file(
1528 args[1],
1529 Results.File.READ if 'O_RDONLY' in args[2]
1530 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001531 else:
1532 # TODO(maruel): Implement relative open if necessary instead of the
1533 # AT_FDCWD flag, let's hope not since this means tracking all active
1534 # directory handles.
maruel@chromium.org538141b2013-06-03 20:57:17 +00001535 raise NotImplementedError('Relative open via openat not implemented.')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001536
1537 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1538 def handle_readlink(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001539 self._handle_file(args[0], Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001540
1541 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1542 def handle_rename(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001543 self._handle_file(args[0], Results.File.READ)
1544 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001545
1546 def handle_rmdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001547 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001548 pass
1549
1550 def handle_setxattr(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001551 # TODO(maruel): self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001552 pass
1553
1554 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1555 def handle_stat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001556 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001557
maruel@chromium.org538141b2013-06-03 20:57:17 +00001558 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1559 def handle_symlink(self, args, _result):
1560 self._handle_file(args[0], Results.File.TOUCHED)
1561 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001562
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001563 @parse_args(r'^\"(.+?)\", \d+', True)
1564 def handle_truncate(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001565 self._handle_file(args[0], Results.File.WRITE)
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001566
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001567 def handle_unlink(self, _args, _result):
1568 # In theory, the file had to be created anyway.
1569 pass
1570
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001571 def handle_unlinkat(self, _args, _result):
1572 # In theory, the file had to be created anyway.
1573 pass
1574
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001575 def handle_statfs(self, _args, _result):
1576 pass
1577
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001578 def handle_utimensat(self, _args, _result):
1579 pass
1580
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001581 def handle_vfork(self, _args, result):
1582 self._handling_forking('vfork', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001583
1584 @staticmethod
1585 def _handle_unknown(function, args, result):
1586 raise TracingFailure(
1587 'Unexpected/unimplemented trace %s(%s)= %s' %
1588 (function, args, result),
1589 None, None, None)
1590
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001591 def _handling_forking(self, name, result):
1592 """Transfers cwd."""
1593 if result.startswith(('?', '-1')):
1594 # The call failed.
1595 return
1596 # Update the other process right away.
1597 childpid = int(result)
1598 child = self._root().get_or_set_proc(childpid)
1599 if child.parentid is not None or childpid in self.children:
1600 raise TracingFailure(
1601 'Found internal inconsitency in process lifetime detection '
1602 'during a %s() call' % name,
1603 None, None, None)
1604
1605 # Copy the cwd object.
1606 child.initial_cwd = self.get_cwd()
1607 child.parentid = self.pid
1608 # It is necessary because the logs are processed out of order.
1609 self.children.append(child)
1610
maruel@chromium.org538141b2013-06-03 20:57:17 +00001611 def _handle_file(self, filepath, mode):
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001612 filepath = self._mangle(filepath)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001613 self.add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001614
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001615 def _mangle(self, filepath):
1616 """Decodes a filepath found in the log and convert it to a late-bound
1617 path if necessary.
1618
1619 |filepath| is an strace 'encoded' string and the returned value is
1620 either an unicode string if the path was absolute or a late bound path
1621 otherwise.
1622 """
1623 filepath = Strace.load_filename(filepath)
1624 if os.path.isabs(filepath):
1625 return filepath
1626 else:
1627 if isinstance(self.get_cwd(), unicode):
1628 return os.path.normpath(os.path.join(self.get_cwd(), filepath))
1629 return self.RelativePath(self.get_cwd(), filepath)
1630
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001631 def __init__(self, blacklist, root_pid, initial_cwd):
1632 """|root_pid| may be None when the root process is not known.
1633
1634 In that case, a search is done after reading all the logs to figure out
1635 the root process.
1636 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001637 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001638 assert_is_renderable(initial_cwd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001639 self.root_pid = root_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001640 self.initial_cwd = initial_cwd
1641
1642 def render(self):
1643 """Returns the string value of the initial cwd of the root process.
1644
1645 Used by RelativePath.
1646 """
1647 return self.initial_cwd
1648
1649 def on_line(self, pid, line):
1650 """Transfers control into the Process.on_line() function."""
1651 self.get_or_set_proc(pid).on_line(line.strip())
1652
1653 def to_results(self):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001654 """If necessary, finds back the root process and verify consistency."""
1655 if not self.root_pid:
1656 # The non-sudo case. The traced process was started by strace itself,
1657 # so the pid of the traced process is not known.
1658 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1659 if len(root) == 1:
1660 self.root_process = root[0]
1661 # Save it for later.
1662 self.root_pid = self.root_process.pid
1663 else:
1664 # The sudo case. The traced process was started manually so its pid is
1665 # known.
1666 self.root_process = self._process_lookup.get(self.root_pid)
1667 if not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001668 raise TracingFailure(
1669 'Found internal inconsitency in process lifetime detection '
1670 'while finding the root process',
1671 None,
1672 None,
1673 None,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001674 self.root_pid,
1675 sorted(self._process_lookup))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001676 process = self.root_process.to_results_process()
1677 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1678 raise TracingFailure(
1679 'Found internal inconsitency in process lifetime detection '
1680 'while looking for len(tree) == len(list)',
1681 None,
1682 None,
1683 None,
1684 sorted(self._process_lookup),
1685 sorted(p.pid for p in process.all))
1686 return Results(process)
1687
1688 def get_or_set_proc(self, pid):
1689 """Returns the Context.Process instance for this pid or creates a new one.
1690 """
1691 if not pid or not isinstance(pid, int):
1692 raise TracingFailure(
1693 'Unpexpected value for pid: %r' % pid,
1694 pid,
1695 None,
1696 None,
1697 pid)
1698 if pid not in self._process_lookup:
1699 self._process_lookup[pid] = self.Process(self, pid)
1700 return self._process_lookup[pid]
1701
1702 @classmethod
1703 def traces(cls):
1704 """Returns the list of all handled traces to pass this as an argument to
1705 strace.
1706 """
1707 prefix = 'handle_'
1708 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1709
1710 class Tracer(ApiBase.Tracer):
1711 MAX_LEN = 256
1712
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001713 def __init__(self, logname, use_sudo):
1714 super(Strace.Tracer, self).__init__(logname)
1715 self.use_sudo = use_sudo
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001716 if use_sudo:
1717 # TODO(maruel): Use the jump script systematically to make it easy to
1718 # figure out the root process, so RelativePath is not necessary anymore.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001719 self._child_script = create_exec_thunk()
1720 self._scripts_to_cleanup.append(self._child_script)
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001721
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001722 def trace(self, cmd, cwd, tracename, output):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001723 """Runs strace on an executable.
1724
1725 When use_sudo=True, it is a 3-phases process: start the thunk, start
1726 sudo strace with the pid of the thunk and then have the thunk os.execve()
1727 the process to trace.
1728 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001729 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1730 assert os.path.isabs(cmd[0]), cmd[0]
1731 assert os.path.isabs(cwd), cwd
1732 assert os.path.normpath(cwd) == cwd, cwd
1733 with self._lock:
1734 if not self._initialized:
1735 raise TracingFailure(
1736 'Called Tracer.trace() on an unitialized object',
1737 None, None, None, tracename)
1738 assert tracename not in (i['trace'] for i in self._traces)
1739 stdout = stderr = None
1740 if output:
1741 stdout = subprocess.PIPE
1742 stderr = subprocess.STDOUT
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001743
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001744 # Ensure all file related APIs are hooked.
1745 traces = ','.join(Strace.Context.traces() + ['file'])
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001746 flags = [
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001747 # Each child process has its own trace file. It is necessary because
1748 # strace may generate corrupted log file if multiple processes are
1749 # heavily doing syscalls simultaneously.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001750 '-ff',
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001751 # Reduce whitespace usage.
1752 '-a1',
1753 # hex encode non-ascii strings.
1754 # TODO(maruel): '-x',
1755 # TODO(maruel): '-ttt',
1756 # Signals are unnecessary noise here. Note the parser can cope with them
1757 # but reduce the unnecessary output.
1758 '-esignal=none',
1759 # Print as much data as wanted.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001760 '-s', '%d' % self.MAX_LEN,
1761 '-e', 'trace=%s' % traces,
1762 '-o', self._logname + '.' + tracename,
1763 ]
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001764
1765 if self.use_sudo:
1766 pipe_r, pipe_w = os.pipe()
1767 # Start the child process paused.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001768 target_cmd = [sys.executable, self._child_script, str(pipe_r)] + cmd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001769 logging.debug(' '.join(target_cmd))
1770 child_proc = subprocess.Popen(
1771 target_cmd,
1772 stdin=subprocess.PIPE,
1773 stdout=stdout,
1774 stderr=stderr,
1775 cwd=cwd)
1776
1777 # TODO(maruel): both processes must use the same UID for it to work
1778 # without sudo. Look why -p is failing at the moment without sudo.
1779 trace_cmd = [
1780 'sudo',
1781 'strace',
1782 '-p', str(child_proc.pid),
1783 ] + flags
1784 logging.debug(' '.join(trace_cmd))
1785 strace_proc = subprocess.Popen(
1786 trace_cmd,
1787 cwd=cwd,
1788 stdin=subprocess.PIPE,
1789 stdout=subprocess.PIPE,
1790 stderr=subprocess.PIPE)
1791
1792 line = strace_proc.stderr.readline()
1793 if not re.match(r'^Process \d+ attached \- interrupt to quit$', line):
1794 # TODO(maruel): Raise an exception.
1795 assert False, line
1796
1797 # Now fire the child process.
1798 os.write(pipe_w, 'x')
1799
1800 out = child_proc.communicate()[0]
1801 strace_out = strace_proc.communicate()[0]
1802
1803 # TODO(maruel): if strace_proc.returncode: Add an exception.
1804 saved_out = strace_out if strace_proc.returncode else out
1805 root_pid = child_proc.pid
1806 else:
1807 # Non-sudo case.
1808 trace_cmd = [
1809 'strace',
1810 ] + flags + cmd
1811 logging.debug(' '.join(trace_cmd))
1812 child_proc = subprocess.Popen(
1813 trace_cmd,
1814 cwd=cwd,
1815 stdin=subprocess.PIPE,
1816 stdout=stdout,
1817 stderr=stderr)
1818 out = child_proc.communicate()[0]
1819 # TODO(maruel): Walk the logs and figure out the root process would
1820 # simplify parsing the logs a *lot*.
1821 saved_out = out
1822 # The trace reader will have to figure out.
1823 root_pid = None
1824
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001825 with self._lock:
1826 assert tracename not in (i['trace'] for i in self._traces)
1827 self._traces.append(
1828 {
1829 'cmd': cmd,
1830 'cwd': cwd,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001831 'output': saved_out,
1832 'pid': root_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001833 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001834 })
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001835 return child_proc.returncode, out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001836
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001837 def __init__(self, use_sudo=None):
1838 super(Strace, self).__init__()
1839 self.use_sudo = use_sudo
1840
1841 def get_tracer(self, logname):
1842 return self.Tracer(logname, self.use_sudo)
1843
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001844 @staticmethod
1845 def clean_trace(logname):
1846 if os.path.isfile(logname):
1847 os.remove(logname)
1848 # Also delete any pid specific file from previous traces.
1849 for i in glob.iglob(logname + '.*'):
1850 if i.rsplit('.', 1)[1].isdigit():
1851 os.remove(i)
1852
1853 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001854 def parse_log(cls, logname, blacklist, trace_name):
1855 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001856 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001857 data = read_json(logname)
1858 out = []
1859 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001860 if trace_name and item['trace'] != trace_name:
1861 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001862 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001863 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001864 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001865 }
1866 try:
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001867 context = cls.Context(blacklist, item['pid'], item['cwd'])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001868 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001869 logging.debug('Reading %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001870 pid = pidfile.rsplit('.', 1)[1]
1871 if pid.isdigit():
1872 pid = int(pid)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001873 found_line = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001874 for line in open(pidfile, 'rb'):
1875 context.on_line(pid, line)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001876 found_line = True
1877 if not found_line:
1878 # Ensures that a completely empty trace still creates the
1879 # corresponding Process instance by logging a dummy line.
1880 context.on_line(pid, '')
1881 else:
1882 logging.warning('Found unexpected file %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001883 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001884 except TracingFailure:
1885 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001886 out.append(result)
1887 return out
1888
1889
1890class Dtrace(ApiBase):
1891 """Uses DTrace framework through dtrace. Requires root access.
1892
1893 Implies Mac OSX.
1894
1895 dtruss can't be used because it has compatibility issues with python.
1896
1897 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1898 get the absolute path of the 'cwd' dtrace variable from the probe.
1899
1900 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1901 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1902 manually.
1903
1904 errno is not printed in the log since this implementation currently only cares
1905 about files that were successfully opened.
1906 """
1907 class Context(ApiBase.Context):
1908 # Format: index pid function(args)
1909 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1910
1911 # Arguments parsing.
1912 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1913 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1914 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1915 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1916 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1917 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1918
maruel@chromium.orgac361162013-06-04 15:54:06 +00001919 # O_DIRECTORY is not defined on Windows and dtrace doesn't exist on Windows.
1920 O_DIRECTORY = os.O_DIRECTORY if hasattr(os, 'O_DIRECTORY') else None
maruel@chromium.org538141b2013-06-03 20:57:17 +00001921 O_RDWR = os.O_RDWR
1922 O_WRONLY = os.O_WRONLY
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001923
1924 class Process(ApiBase.Context.Process):
1925 def __init__(self, *args):
1926 super(Dtrace.Context.Process, self).__init__(*args)
1927 self.cwd = self.initial_cwd
1928
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001929 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001930 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001931 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001932 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001933 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001934 # Process ID of the temporary script created by create_subprocess_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001935 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001936 self._initial_cwd = initial_cwd
1937 self._line_number = 0
1938
1939 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001940 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001941 self._line_number += 1
1942 match = self.RE_HEADER.match(line)
1943 if not match:
1944 raise TracingFailure(
1945 'Found malformed line: %s' % line,
1946 None,
1947 self._line_number,
1948 line)
1949 fn = getattr(
1950 self,
1951 'handle_%s' % match.group(2).replace('-', '_'),
1952 self._handle_ignored)
1953 # It is guaranteed to succeed because of the regexp. Or at least I thought
1954 # it would.
1955 pid = int(match.group(1))
1956 try:
1957 return fn(pid, match.group(3))
1958 except TracingFailure, e:
1959 # Hack in the values since the handler could be a static function.
1960 e.pid = pid
1961 e.line = line
1962 e.line_number = self._line_number
1963 # Re-raise the modified exception.
1964 raise
1965 except (KeyError, NotImplementedError, ValueError), e:
1966 raise TracingFailure(
1967 'Trace generated a %s exception: %s' % (
1968 e.__class__.__name__, str(e)),
1969 pid,
1970 self._line_number,
1971 line,
1972 e)
1973
1974 def to_results(self):
1975 process = self.root_process.to_results_process()
1976 # Internal concistency check.
1977 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1978 raise TracingFailure(
1979 'Found internal inconsitency in process lifetime detection '
1980 'while looking for len(tree) == len(list)',
1981 None,
1982 None,
1983 None,
1984 sorted(self._process_lookup),
1985 sorted(p.pid for p in process.all))
1986 return Results(process)
1987
1988 def handle_dtrace_BEGIN(self, _pid, args):
1989 if not self.RE_DTRACE_BEGIN.match(args):
1990 raise TracingFailure(
1991 'Found internal inconsitency in dtrace_BEGIN log line',
1992 None, None, None)
1993
1994 def handle_proc_start(self, pid, args):
1995 """Transfers cwd.
1996
1997 The dtrace script already takes care of only tracing the processes that
1998 are child of the traced processes so there is no need to verify the
1999 process hierarchy.
2000 """
2001 if pid in self._process_lookup:
2002 raise TracingFailure(
2003 'Found internal inconsitency in proc_start: %d started two times' %
2004 pid,
2005 None, None, None)
2006 match = self.RE_PROC_START.match(args)
2007 if not match:
2008 raise TracingFailure(
2009 'Failed to parse arguments: %s' % args,
2010 None, None, None)
2011 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002012 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002013 proc = self.root_process = self.Process(
2014 self.blacklist, pid, self._initial_cwd)
2015 elif ppid in self._process_lookup:
2016 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
2017 self._process_lookup[ppid].children.append(proc)
2018 else:
2019 # Another process tree, ignore.
2020 return
2021 self._process_lookup[pid] = proc
2022 logging.debug(
2023 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002024 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002025
2026 def handle_proc_exit(self, pid, _args):
2027 """Removes cwd."""
2028 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002029 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002030 self._process_lookup[pid].cwd = None
2031
2032 def handle_execve(self, pid, args):
2033 """Sets the process' executable.
2034
2035 TODO(maruel): Read command line arguments. See
2036 https://discussions.apple.com/thread/1980539 for an example.
2037 https://gist.github.com/1242279
2038
2039 Will have to put the answer at http://stackoverflow.com/questions/7556249.
2040 :)
2041 """
2042 if not pid in self._process_lookup:
2043 # Another process tree, ignore.
2044 return
2045 match = self.RE_EXECVE.match(args)
2046 if not match:
2047 raise TracingFailure(
2048 'Failed to parse arguments: %r' % args,
2049 None, None, None)
2050 proc = self._process_lookup[pid]
2051 proc.executable = match.group(1)
maruel@chromium.org538141b2013-06-03 20:57:17 +00002052 self._handle_file(pid, proc.executable, Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002053 proc.command = self.process_escaped_arguments(match.group(3))
2054 if int(match.group(2)) != len(proc.command):
2055 raise TracingFailure(
2056 'Failed to parse execve() arguments: %s' % args,
2057 None, None, None)
2058
2059 def handle_chdir(self, pid, args):
2060 """Updates cwd."""
2061 if pid not in self._process_lookup:
2062 # Another process tree, ignore.
2063 return
2064 cwd = self.RE_CHDIR.match(args).group(1)
2065 if not cwd.startswith('/'):
2066 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
2067 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
2068 else:
2069 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
2070 cwd2 = cwd
2071 self._process_lookup[pid].cwd = cwd2
2072
2073 def handle_open_nocancel(self, pid, args):
2074 """Redirects to handle_open()."""
2075 return self.handle_open(pid, args)
2076
2077 def handle_open(self, pid, args):
2078 if pid not in self._process_lookup:
2079 # Another process tree, ignore.
2080 return
2081 match = self.RE_OPEN.match(args)
2082 if not match:
2083 raise TracingFailure(
2084 'Failed to parse arguments: %s' % args,
2085 None, None, None)
2086 flag = int(match.group(2), 16)
2087 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
2088 # Ignore directories.
2089 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002090 self._handle_file(
2091 pid,
2092 match.group(1),
2093 Results.File.READ if not ((self.O_RDWR | self.O_WRONLY) & flag)
2094 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002095
2096 def handle_rename(self, pid, args):
2097 if pid not in self._process_lookup:
2098 # Another process tree, ignore.
2099 return
2100 match = self.RE_RENAME.match(args)
2101 if not match:
2102 raise TracingFailure(
2103 'Failed to parse arguments: %s' % args,
2104 None, None, None)
maruel@chromium.org538141b2013-06-03 20:57:17 +00002105 self._handle_file(pid, match.group(1), Results.File.READ)
2106 self._handle_file(pid, match.group(2), Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002107
maruel@chromium.org538141b2013-06-03 20:57:17 +00002108 def _handle_file(self, pid, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002109 if not filepath.startswith('/'):
2110 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
2111 # We can get '..' in the path.
2112 filepath = os.path.normpath(filepath)
2113 # Sadly, still need to filter out directories here;
2114 # saw open_nocancel(".", 0, 0) = 0 lines.
2115 if os.path.isdir(filepath):
2116 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002117 self._process_lookup[pid].add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002118
2119 def handle_ftruncate(self, pid, args):
2120 """Just used as a signal to kill dtrace, ignoring."""
2121 pass
2122
2123 @staticmethod
2124 def _handle_ignored(pid, args):
2125 """Is called for all the event traces that are not handled."""
2126 raise NotImplementedError('Please implement me')
2127
2128 @staticmethod
2129 def process_escaped_arguments(text):
2130 """Extracts escaped arguments on a string and return the arguments as a
2131 list.
2132
2133 Implemented as an automaton.
2134
2135 Example:
2136 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
2137 function will return ['python2.7', '-c', 'print("hi")]
2138 """
2139 if not text.endswith('\\0'):
2140 raise ValueError('String is not null terminated: %r' % text, text)
2141 text = text[:-2]
2142
2143 def unescape(x):
2144 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
2145 out = []
2146 escaped = False
2147 for i in x:
2148 if i == '\\' and not escaped:
2149 escaped = True
2150 continue
2151 escaped = False
2152 out.append(i)
2153 return ''.join(out)
2154
2155 return [unescape(i) for i in text.split('\\001')]
2156
2157 class Tracer(ApiBase.Tracer):
2158 # pylint: disable=C0301
2159 #
2160 # To understand the following code, you'll want to take a look at:
2161 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
2162 # https://wikis.oracle.com/display/DTrace/Variables
2163 # http://docs.oracle.com/cd/E19205-01/820-4221/
2164 #
2165 # 0. Dump all the valid probes into a text file. It is important, you
2166 # want to redirect into a file and you don't want to constantly 'sudo'.
2167 # $ sudo dtrace -l > probes.txt
2168 #
2169 # 1. Count the number of probes:
2170 # $ wc -l probes.txt
2171 # 81823 # On OSX 10.7, including 1 header line.
2172 #
2173 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
2174 # likes and skipping the header with NR>1:
2175 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
2176 # dtrace
2177 # fbt
2178 # io
2179 # ip
2180 # lockstat
2181 # mach_trap
2182 # proc
2183 # profile
2184 # sched
2185 # syscall
2186 # tcp
2187 # vminfo
2188 #
2189 # 3. List of valid probes:
2190 # $ grep syscall probes.txt | less
2191 # or use dtrace directly:
2192 # $ sudo dtrace -l -P syscall | less
2193 #
2194 # trackedpid is an associative array where its value can be 0, 1 or 2.
2195 # 0 is for untracked processes and is the default value for items not
2196 # in the associative array.
2197 # 1 is for tracked processes.
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002198 # 2 is for the script created by create_subprocess_thunk() only. It is not
2199 # tracked itself but all its decendants are.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002200 #
2201 # The script will kill itself only once waiting_to_die == 1 and
2202 # current_processes == 0, so that both getlogin() was called and that
2203 # all traced processes exited.
2204 #
2205 # TODO(maruel): Use cacheable predicates. See
2206 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
2207 D_CODE = """
2208 dtrace:::BEGIN {
2209 waiting_to_die = 0;
2210 current_processes = 0;
2211 logindex = 0;
2212 printf("%d %d %s_%s(\\"%s\\")\\n",
2213 logindex, PID, probeprov, probename, SCRIPT);
2214 logindex++;
2215 }
2216
2217 proc:::start /trackedpid[ppid]/ {
2218 trackedpid[pid] = 1;
2219 current_processes += 1;
2220 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
2221 logindex, pid, probeprov, probename,
2222 ppid,
2223 execname,
2224 current_processes);
2225 logindex++;
2226 }
2227 /* Should use SCRIPT but there is no access to this variable at that
2228 * point. */
2229 proc:::start /ppid == PID && execname == "Python"/ {
2230 trackedpid[pid] = 2;
2231 current_processes += 1;
2232 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
2233 logindex, pid, probeprov, probename,
2234 ppid,
2235 execname,
2236 current_processes);
2237 logindex++;
2238 }
2239 proc:::exit /trackedpid[pid] &&
2240 current_processes == 1 &&
2241 waiting_to_die == 1/ {
2242 trackedpid[pid] = 0;
2243 current_processes -= 1;
2244 printf("%d %d %s_%s(%d)\\n",
2245 logindex, pid, probeprov, probename,
2246 current_processes);
2247 logindex++;
2248 exit(0);
2249 }
2250 proc:::exit /trackedpid[pid]/ {
2251 trackedpid[pid] = 0;
2252 current_processes -= 1;
2253 printf("%d %d %s_%s(%d)\\n",
2254 logindex, pid, probeprov, probename,
2255 current_processes);
2256 logindex++;
2257 }
2258
2259 /* Use an arcane function to detect when we need to die */
2260 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
2261 waiting_to_die = 1;
2262 printf("%d %d %s()\\n", logindex, pid, probefunc);
2263 logindex++;
2264 }
2265 syscall::ftruncate:entry /
2266 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
2267 exit(0);
2268 }
2269
2270 syscall::open*:entry /trackedpid[pid] == 1/ {
2271 self->open_arg0 = arg0;
2272 self->open_arg1 = arg1;
2273 self->open_arg2 = arg2;
2274 }
2275 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
2276 this->open_arg0 = copyinstr(self->open_arg0);
2277 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
2278 logindex, pid, probefunc,
2279 this->open_arg0,
2280 self->open_arg1,
2281 self->open_arg2);
2282 logindex++;
2283 this->open_arg0 = 0;
2284 }
2285 syscall::open*:return /trackedpid[pid] == 1/ {
2286 self->open_arg0 = 0;
2287 self->open_arg1 = 0;
2288 self->open_arg2 = 0;
2289 }
2290
2291 syscall::rename:entry /trackedpid[pid] == 1/ {
2292 self->rename_arg0 = arg0;
2293 self->rename_arg1 = arg1;
2294 }
2295 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
2296 this->rename_arg0 = copyinstr(self->rename_arg0);
2297 this->rename_arg1 = copyinstr(self->rename_arg1);
2298 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
2299 logindex, pid, probefunc,
2300 this->rename_arg0,
2301 this->rename_arg1);
2302 logindex++;
2303 this->rename_arg0 = 0;
2304 this->rename_arg1 = 0;
2305 }
2306 syscall::rename:return /trackedpid[pid] == 1/ {
2307 self->rename_arg0 = 0;
2308 self->rename_arg1 = 0;
2309 }
2310
2311 /* Track chdir, it's painful because it is only receiving relative path.
2312 */
2313 syscall::chdir:entry /trackedpid[pid] == 1/ {
2314 self->chdir_arg0 = arg0;
2315 }
2316 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
2317 this->chdir_arg0 = copyinstr(self->chdir_arg0);
2318 printf("%d %d %s(\\"%s\\")\\n",
2319 logindex, pid, probefunc,
2320 this->chdir_arg0);
2321 logindex++;
2322 this->chdir_arg0 = 0;
2323 }
2324 syscall::chdir:return /trackedpid[pid] == 1/ {
2325 self->chdir_arg0 = 0;
2326 }
2327 """
2328
2329 # execve-specific code, tends to throw a lot of exceptions.
2330 D_CODE_EXECVE = """
2331 /* Finally what we care about! */
2332 syscall::exec*:entry /trackedpid[pid]/ {
2333 self->exec_arg0 = copyinstr(arg0);
2334 /* Incrementally probe for a NULL in the argv parameter of execve() to
2335 * figure out argc. */
2336 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
2337 * found. */
2338 self->exec_argc = 0;
2339 /* Probe for argc==1 */
2340 this->exec_argv = (user_addr_t*)copyin(
2341 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2342 self->exec_argc = this->exec_argv[self->exec_argc] ?
2343 (self->exec_argc + 1) : self->exec_argc;
2344
2345 /* Probe for argc==2 */
2346 this->exec_argv = (user_addr_t*)copyin(
2347 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2348 self->exec_argc = this->exec_argv[self->exec_argc] ?
2349 (self->exec_argc + 1) : self->exec_argc;
2350
2351 /* Probe for argc==3 */
2352 this->exec_argv = (user_addr_t*)copyin(
2353 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2354 self->exec_argc = this->exec_argv[self->exec_argc] ?
2355 (self->exec_argc + 1) : self->exec_argc;
2356
2357 /* Probe for argc==4 */
2358 this->exec_argv = (user_addr_t*)copyin(
2359 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2360 self->exec_argc = this->exec_argv[self->exec_argc] ?
2361 (self->exec_argc + 1) : self->exec_argc;
2362
2363 /* Copy the inputs strings since there is no guarantee they'll be
2364 * present after the call completed. */
2365 self->exec_argv0 = (self->exec_argc > 0) ?
2366 copyinstr(this->exec_argv[0]) : "";
2367 self->exec_argv1 = (self->exec_argc > 1) ?
2368 copyinstr(this->exec_argv[1]) : "";
2369 self->exec_argv2 = (self->exec_argc > 2) ?
2370 copyinstr(this->exec_argv[2]) : "";
2371 self->exec_argv3 = (self->exec_argc > 3) ?
2372 copyinstr(this->exec_argv[3]) : "";
2373 this->exec_argv = 0;
2374 }
2375 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2376 /* We need to join strings here, as using multiple printf() would
2377 * cause tearing when multiple threads/processes are traced.
2378 * Since it is impossible to escape a string and join it to another one,
2379 * like sprintf("%s%S", previous, more), use hackery.
2380 * Each of the elements are split with a \\1. \\0 cannot be used because
2381 * it is simply ignored. This will conflict with any program putting a
2382 * \\1 in their execve() string but this should be "rare enough" */
2383 this->args = "";
2384 /* Process exec_argv[0] */
2385 this->args = strjoin(
2386 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2387
2388 /* Process exec_argv[1] */
2389 this->args = strjoin(
2390 this->args, (self->exec_argc > 1) ? "\\1" : "");
2391 this->args = strjoin(
2392 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2393
2394 /* Process exec_argv[2] */
2395 this->args = strjoin(
2396 this->args, (self->exec_argc > 2) ? "\\1" : "");
2397 this->args = strjoin(
2398 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2399
2400 /* Process exec_argv[3] */
2401 this->args = strjoin(
2402 this->args, (self->exec_argc > 3) ? "\\1" : "");
2403 this->args = strjoin(
2404 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2405
2406 /* Prints self->exec_argc to permits verifying the internal
2407 * consistency since this code is quite fishy. */
2408 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2409 logindex, pid, probefunc,
2410 self->exec_arg0,
2411 self->exec_argc,
2412 this->args);
2413 logindex++;
2414 this->args = 0;
2415 }
2416 syscall::exec*:return /trackedpid[pid]/ {
2417 self->exec_arg0 = 0;
2418 self->exec_argc = 0;
2419 self->exec_argv0 = 0;
2420 self->exec_argv1 = 0;
2421 self->exec_argv2 = 0;
2422 self->exec_argv3 = 0;
2423 }
2424 """
2425
2426 # Code currently not used.
2427 D_EXTRANEOUS = """
2428 /* This is a good learning experience, since it traces a lot of things
2429 * related to the process and child processes.
2430 * Warning: it generates a gigantic log. For example, tracing
2431 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2432 * several minutes to execute.
2433 */
2434 /*
2435 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2436 printf("%d %d %s_%s() = %d\\n",
2437 logindex, pid, probeprov, probefunc, errno);
2438 logindex++;
2439 }
2440 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2441 printf("%d %d %s_%s() = %d\\n",
2442 logindex, pid, probeprov, probefunc, errno);
2443 logindex++;
2444 }
2445 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2446 printf("%d %d %s_%s() = %d\\n",
2447 logindex, pid, probeprov, probefunc, errno);
2448 logindex++;
2449 }
2450 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2451 printf("%d %d %s_%s() = %d\\n",
2452 logindex, pid, probeprov, probefunc, errno);
2453 logindex++;
2454 }
2455 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2456 printf("%d %d %s_%s() = %d\\n",
2457 logindex, pid, probeprov, probefunc, errno);
2458 logindex++;
2459 }
2460 */
2461 /* TODO(maruel): *stat* functions and friends
2462 syscall::access:return,
2463 syscall::chdir:return,
2464 syscall::chflags:return,
2465 syscall::chown:return,
2466 syscall::chroot:return,
2467 syscall::getattrlist:return,
2468 syscall::getxattr:return,
2469 syscall::lchown:return,
2470 syscall::lstat64:return,
2471 syscall::lstat:return,
2472 syscall::mkdir:return,
2473 syscall::pathconf:return,
2474 syscall::readlink:return,
2475 syscall::removexattr:return,
2476 syscall::setxattr:return,
2477 syscall::stat64:return,
2478 syscall::stat:return,
2479 syscall::truncate:return,
2480 syscall::unlink:return,
2481 syscall::utimes:return,
2482 */
2483 """
2484
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002485 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002486 """Starts the log collection with dtrace.
2487
2488 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2489 this needs to wait for dtrace to be "warmed up".
2490 """
2491 super(Dtrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002492 # This script is used as a signal to figure out the root process.
2493 self._signal_script = create_subprocess_thunk()
2494 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002495 # This unique dummy temp file is used to signal the dtrace script that it
2496 # should stop as soon as all the child processes are done. A bit hackish
2497 # but works fine enough.
2498 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2499 prefix='trace_signal_file')
2500
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002501 dtrace_path = '/usr/sbin/dtrace'
2502 if not os.path.isfile(dtrace_path):
2503 dtrace_path = 'dtrace'
2504 elif use_sudo is None and (os.stat(dtrace_path).st_mode & stat.S_ISUID):
2505 # No need to sudo. For those following at home, don't do that.
2506 use_sudo = False
2507
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002508 # Note: do not use the -p flag. It's useless if the initial process quits
2509 # too fast, resulting in missing traces from the grand-children. The D
2510 # code manages the dtrace lifetime itself.
2511 trace_cmd = [
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002512 dtrace_path,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002513 # Use a larger buffer if getting 'out of scratch space' errors.
2514 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2515 '-b', '10m',
2516 '-x', 'dynvarsize=10m',
2517 #'-x', 'dtrace_global_maxsize=1m',
2518 '-x', 'evaltime=exec',
2519 '-o', '/dev/stderr',
2520 '-q',
2521 '-n', self._get_dtrace_code(),
2522 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002523 if use_sudo is not False:
2524 trace_cmd.insert(0, 'sudo')
2525
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002526 with open(self._logname + '.log', 'wb') as logfile:
2527 self._dtrace = subprocess.Popen(
2528 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2529 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2530
2531 # Reads until one line is printed, which signifies dtrace is up and ready.
2532 with open(self._logname + '.log', 'rb') as logfile:
2533 while 'dtrace_BEGIN' not in logfile.readline():
2534 if self._dtrace.poll() is not None:
2535 # Do a busy wait. :/
2536 break
2537 logging.debug('dtrace started')
2538
2539 def _get_dtrace_code(self):
2540 """Setups the D code to implement child process tracking.
2541
2542 Injects the cookie in the script so it knows when to stop.
2543
2544 The script will detect any instance of the script created with
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002545 create_subprocess_thunk() and will start tracing it.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002546 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002547 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002548 'inline int PID = %d;\n'
2549 'inline string SCRIPT = "%s";\n'
2550 'inline int FILE_ID = %d;\n'
2551 '\n'
2552 '%s') % (
2553 os.getpid(),
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002554 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002555 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002556 self.D_CODE)
2557 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2558 # Do not enable by default since it tends to spew dtrace: error lines
2559 # because the execve() parameters are not in valid memory at the time of
2560 # logging.
2561 # TODO(maruel): Find a way to make this reliable since it's useful but
2562 # only works in limited/trivial uses cases for now.
2563 out += self.D_CODE_EXECVE
2564 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002565
2566 def trace(self, cmd, cwd, tracename, output):
2567 """Runs dtrace on an executable.
2568
2569 This dtruss is broken when it starts the process itself or when tracing
2570 child processes, this code starts a wrapper process
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002571 generated with create_subprocess_thunk() which starts the executable to
2572 trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002573 """
2574 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2575 assert os.path.isabs(cmd[0]), cmd[0]
2576 assert os.path.isabs(cwd), cwd
2577 assert os.path.normpath(cwd) == cwd, cwd
2578 with self._lock:
2579 if not self._initialized:
2580 raise TracingFailure(
2581 'Called Tracer.trace() on an unitialized object',
2582 None, None, None, tracename)
2583 assert tracename not in (i['trace'] for i in self._traces)
2584
2585 # Starts the script wrapper to start the child process. This signals the
2586 # dtrace script that this process is to be traced.
2587 stdout = stderr = None
2588 if output:
2589 stdout = subprocess.PIPE
2590 stderr = subprocess.STDOUT
2591 child_cmd = [
2592 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002593 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002594 tracename,
2595 ]
2596 # Call a dummy function so that dtrace knows I'm about to launch a process
2597 # that needs to be traced.
2598 # Yummy.
2599 child = subprocess.Popen(
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00002600 child_cmd + tools.fix_python_path(cmd),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002601 stdin=subprocess.PIPE,
2602 stdout=stdout,
2603 stderr=stderr,
2604 cwd=cwd)
2605 logging.debug('Started child pid: %d' % child.pid)
2606
2607 out = child.communicate()[0]
2608 # This doesn't mean tracing is done, one of the grand-child process may
2609 # still be alive. It will be tracked with the dtrace script.
2610
2611 with self._lock:
2612 assert tracename not in (i['trace'] for i in self._traces)
2613 self._traces.append(
2614 {
2615 'cmd': cmd,
2616 'cwd': cwd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002617 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002618 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002619 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002620 })
2621 return child.returncode, out
2622
2623 def close(self, timeout=None):
2624 """Terminates dtrace."""
2625 logging.debug('close(%s)' % timeout)
2626 try:
2627 try:
2628 super(Dtrace.Tracer, self).close(timeout)
2629 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002630 # ftruncate doesn't exist on Windows.
2631 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002632 if timeout:
2633 start = time.time()
2634 # Use polling. :/
2635 while (self._dtrace.poll() is None and
2636 (time.time() - start) < timeout):
2637 time.sleep(0.1)
2638 self._dtrace.kill()
2639 self._dtrace.wait()
2640 finally:
2641 # Make sure to kill it in any case.
2642 if self._dtrace.poll() is None:
2643 try:
2644 self._dtrace.kill()
2645 self._dtrace.wait()
2646 except OSError:
2647 pass
2648
2649 if self._dtrace.returncode != 0:
2650 # Warn about any dtrace failure but basically ignore it.
2651 print 'dtrace failure: %s' % self._dtrace.returncode
2652 finally:
2653 os.close(self._dummy_file_id)
2654 os.remove(self._dummy_file_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002655
2656 def post_process_log(self):
2657 """Sorts the log back in order when each call occured.
2658
2659 dtrace doesn't save the buffer in strict order since it keeps one buffer
2660 per CPU.
2661 """
2662 super(Dtrace.Tracer, self).post_process_log()
2663 logname = self._logname + '.log'
2664 with open(logname, 'rb') as logfile:
2665 lines = [l for l in logfile if l.strip()]
2666 errors = [l for l in lines if l.startswith('dtrace:')]
2667 if errors:
2668 raise TracingFailure(
2669 'Found errors in the trace: %s' % '\n'.join(errors),
2670 None, None, None, logname)
2671 try:
2672 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2673 except ValueError:
2674 raise TracingFailure(
2675 'Found errors in the trace: %s' % '\n'.join(
2676 l for l in lines if l.split(' ', 1)[0].isdigit()),
2677 None, None, None, logname)
2678 with open(logname, 'wb') as logfile:
2679 logfile.write(''.join(lines))
2680
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002681 def __init__(self, use_sudo=None):
2682 super(Dtrace, self).__init__()
2683 self.use_sudo = use_sudo
2684
2685 def get_tracer(self, logname):
2686 return self.Tracer(logname, self.use_sudo)
2687
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002688 @staticmethod
2689 def clean_trace(logname):
2690 for ext in ('', '.log'):
2691 if os.path.isfile(logname + ext):
2692 os.remove(logname + ext)
2693
2694 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002695 def parse_log(cls, logname, blacklist, trace_name):
2696 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002697 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002698
2699 def blacklist_more(filepath):
2700 # All the HFS metadata is in the form /.vol/...
2701 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2702
2703 data = read_json(logname)
2704 out = []
2705 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002706 if trace_name and item['trace'] != trace_name:
2707 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002708 result = {
2709 'output': item['output'],
2710 'trace': item['trace'],
2711 }
2712 try:
2713 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002714 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2715 # be valid UTF-8 and we control the log output.
2716 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002717 context.on_line(line)
2718 result['results'] = context.to_results()
2719 except TracingFailure:
2720 result['exception'] = sys.exc_info()
2721 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002722 return out
2723
2724
2725class LogmanTrace(ApiBase):
2726 """Uses the native Windows ETW based tracing functionality to trace a child
2727 process.
2728
2729 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2730 the Windows Kernel doesn't have a concept of 'current working directory' at
2731 all. A Win32 process has a map of current directories, one per drive letter
2732 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2733 opened relative to another file_object or as an absolute path. All the current
2734 working directory logic is done in user mode.
2735 """
2736 class Context(ApiBase.Context):
2737 """Processes a ETW log line and keeps the list of existent and non
2738 existent files accessed.
2739
2740 Ignores directories.
2741 """
2742 # These indexes are for the stripped version in json.
2743 EVENT_NAME = 0
2744 TYPE = 1
2745 PID = 2
2746 TID = 3
2747 PROCESSOR_ID = 4
2748 TIMESTAMP = 5
2749 USER_DATA = 6
2750
2751 class Process(ApiBase.Context.Process):
2752 def __init__(self, *args):
2753 super(LogmanTrace.Context.Process, self).__init__(*args)
2754 # Handle file objects that succeeded.
2755 self.file_objects = {}
2756
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002757 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2758 logging.info(
2759 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2760 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002761 super(LogmanTrace.Context, self).__init__(blacklist)
2762 self._drive_map = DosDriveMap()
2763 # Threads mapping to the corresponding process id.
2764 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002765 # Process ID of the tracer, e.g. the temporary script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002766 # create_subprocess_thunk(). This is tricky because the process id may
2767 # have been reused.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002768 self._thunk_pid = thunk_pid
2769 self._thunk_cmd = thunk_cmd
2770 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002771 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002772 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002773
2774 def on_line(self, line):
2775 """Processes a json Event line."""
2776 self._line_number += 1
2777 try:
2778 # By Opcode
2779 handler = getattr(
2780 self,
2781 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2782 None)
2783 if not handler:
2784 raise TracingFailure(
2785 'Unexpected event %s_%s' % (
2786 line[self.EVENT_NAME], line[self.TYPE]),
2787 None, None, None)
2788 handler(line)
2789 except TracingFailure, e:
2790 # Hack in the values since the handler could be a static function.
2791 e.pid = line[self.PID]
2792 e.line = line
2793 e.line_number = self._line_number
2794 # Re-raise the modified exception.
2795 raise
2796 except (KeyError, NotImplementedError, ValueError), e:
2797 raise TracingFailure(
2798 'Trace generated a %s exception: %s' % (
2799 e.__class__.__name__, str(e)),
2800 line[self.PID],
2801 self._line_number,
2802 line,
2803 e)
2804
2805 def to_results(self):
2806 if not self.root_process:
2807 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002808 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002809 None, None, None)
2810 process = self.root_process.to_results_process()
2811 return Results(process)
2812
2813 def _thread_to_process(self, tid):
2814 """Finds the process from the thread id."""
2815 tid = int(tid, 16)
2816 pid = self._threads_active.get(tid)
2817 if not pid or not self._process_lookup.get(pid):
2818 return
2819 return self._process_lookup[pid]
2820
2821 @classmethod
2822 def handle_EventTrace_Header(cls, line):
2823 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2824 BUFFER_SIZE = cls.USER_DATA
2825 #VERSION = cls.USER_DATA + 1
2826 #PROVIDER_VERSION = cls.USER_DATA + 2
2827 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2828 #END_TIME = cls.USER_DATA + 4
2829 #TIMER_RESOLUTION = cls.USER_DATA + 5
2830 #MAX_FILE_SIZE = cls.USER_DATA + 6
2831 #LOG_FILE_MODE = cls.USER_DATA + 7
2832 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2833 #START_BUFFERS = cls.USER_DATA + 9
2834 #POINTER_SIZE = cls.USER_DATA + 10
2835 EVENTS_LOST = cls.USER_DATA + 11
2836 #CPU_SPEED = cls.USER_DATA + 12
2837 #LOGGER_NAME = cls.USER_DATA + 13
2838 #LOG_FILE_NAME = cls.USER_DATA + 14
2839 #BOOT_TIME = cls.USER_DATA + 15
2840 #PERF_FREQ = cls.USER_DATA + 16
2841 #START_TIME = cls.USER_DATA + 17
2842 #RESERVED_FLAGS = cls.USER_DATA + 18
2843 #BUFFERS_LOST = cls.USER_DATA + 19
2844 #SESSION_NAME_STRING = cls.USER_DATA + 20
2845 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2846 if line[EVENTS_LOST] != '0':
2847 raise TracingFailure(
2848 ( '%s events were lost during trace, please increase the buffer '
2849 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2850 None, None, None)
2851
2852 def handle_FileIo_Cleanup(self, line):
2853 """General wisdom: if a file is closed, it's because it was opened.
2854
2855 Note that FileIo_Close is not used since if a file was opened properly but
2856 not closed before the process exits, only Cleanup will be logged.
2857 """
2858 #IRP = self.USER_DATA
2859 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2860 FILE_OBJECT = self.USER_DATA + 2
2861 #FILE_KEY = self.USER_DATA + 3
2862 proc = self._thread_to_process(line[TTID])
2863 if not proc:
2864 # Not a process we care about.
2865 return
2866 file_object = line[FILE_OBJECT]
2867 if file_object in proc.file_objects:
maruel@chromium.org538141b2013-06-03 20:57:17 +00002868 filepath, access_type = proc.file_objects.pop(file_object)
2869 proc.add_file(filepath, access_type)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002870
2871 def handle_FileIo_Create(self, line):
2872 """Handles a file open.
2873
2874 All FileIo events are described at
2875 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2876 for some value of 'description'.
2877
2878 " (..) process and thread id values of the IO events (..) are not valid "
2879 http://msdn.microsoft.com/magazine/ee358703.aspx
2880
2881 The FileIo.Create event doesn't return if the CreateFile() call
2882 succeeded, so keep track of the file_object and check that it is
2883 eventually closed with FileIo_Cleanup.
2884 """
2885 #IRP = self.USER_DATA
2886 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2887 FILE_OBJECT = self.USER_DATA + 2
2888 #CREATE_OPTIONS = self.USER_DATA + 3
2889 #FILE_ATTRIBUTES = self.USER_DATA + 4
maruel@chromium.org538141b2013-06-03 20:57:17 +00002890 #SHARE_ACCESS = self.USER_DATA + 5
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002891 OPEN_PATH = self.USER_DATA + 6
2892
2893 proc = self._thread_to_process(line[TTID])
2894 if not proc:
2895 # Not a process we care about.
2896 return
2897
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002898 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002899 # Ignore directories and bare drive right away.
2900 if raw_path.endswith(os.path.sep):
2901 return
2902 filepath = self._drive_map.to_win32(raw_path)
2903 # Ignore bare drive right away. Some may still fall through with format
2904 # like '\\?\X:'
2905 if len(filepath) == 2:
2906 return
2907 file_object = line[FILE_OBJECT]
2908 if os.path.isdir(filepath):
2909 # There is no O_DIRECTORY equivalent on Windows. The closed is
2910 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2911 # simply discard directories are they are found.
2912 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002913 # Override any stale file object.
2914 # TODO(maruel): Figure out a way to detect if the file was opened for
2915 # reading or writting. Sadly CREATE_OPTIONS doesn't seem to be of any help
2916 # here. For now mark as None to make it clear we have no idea what it is
2917 # about.
2918 proc.file_objects[file_object] = (filepath, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002919
2920 def handle_FileIo_Rename(self, line):
2921 # TODO(maruel): Handle?
2922 pass
2923
2924 def handle_Process_End(self, line):
2925 pid = line[self.PID]
2926 if self._process_lookup.get(pid):
2927 logging.info('Terminated: %d' % pid)
2928 self._process_lookup[pid] = None
2929 else:
2930 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002931 if self._thunk_process and self._thunk_process.pid == pid:
2932 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002933
2934 def handle_Process_Start(self, line):
2935 """Handles a new child process started by PID."""
2936 #UNIQUE_PROCESS_KEY = self.USER_DATA
2937 PROCESS_ID = self.USER_DATA + 1
2938 #PARENT_PID = self.USER_DATA + 2
2939 #SESSION_ID = self.USER_DATA + 3
2940 #EXIT_STATUS = self.USER_DATA + 4
2941 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2942 #USER_SID = self.USER_DATA + 6
2943 IMAGE_FILE_NAME = self.USER_DATA + 7
2944 COMMAND_LINE = self.USER_DATA + 8
2945
2946 ppid = line[self.PID]
2947 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002948 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002949 logging.debug(
2950 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002951 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002952
2953 if pid == self._thunk_pid:
2954 # Need to ignore processes we don't know about because the log is
2955 # system-wide. self._thunk_pid shall start only one process.
2956 # This is tricky though because Windows *loves* to reuse process id and
2957 # it happens often that the process ID of the thunk script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002958 # create_subprocess_thunk() is reused. So just detecting the pid here is
2959 # not sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002960 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2961 logging.info(
2962 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2963 pid, self._trace_name, command_line, self._thunk_cmd)
2964 return
2965
2966 # TODO(maruel): The check is quite weak. Add the thunk path.
2967 if self._thunk_process:
2968 raise TracingFailure(
2969 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2970 'already set') % (self._thunk_pid, self._thunk_process.pid),
2971 None, None, None)
2972 proc = self.Process(self.blacklist, pid, None)
2973 self._thunk_process = proc
2974 return
2975 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002976 proc = self.Process(self.blacklist, pid, None)
2977 self.root_process = proc
2978 ppid = None
2979 elif self._process_lookup.get(ppid):
2980 proc = self.Process(self.blacklist, pid, None)
2981 self._process_lookup[ppid].children.append(proc)
2982 else:
2983 # Ignore
2984 return
2985 self._process_lookup[pid] = proc
2986
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002987 proc.command = command_line
2988 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002989 # proc.command[0] may be the absolute path of 'executable' but it may be
2990 # anything else too. If it happens that command[0] ends with executable,
2991 # use it, otherwise defaults to the base name.
2992 cmd0 = proc.command[0].lower()
2993 if not cmd0.endswith('.exe'):
2994 # TODO(maruel): That's not strictly true either.
2995 cmd0 += '.exe'
2996 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
2997 # Fix the path.
2998 cmd0 = cmd0.replace('/', os.path.sep)
2999 cmd0 = os.path.normpath(cmd0)
3000 proc.executable = get_native_path_case(cmd0)
3001 logging.info(
3002 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
3003
3004 def handle_Thread_End(self, line):
3005 """Has the same parameters as Thread_Start."""
3006 tid = int(line[self.TID], 16)
3007 self._threads_active.pop(tid, None)
3008
3009 def handle_Thread_Start(self, line):
3010 """Handles a new thread created.
3011
3012 Do not use self.PID here since a process' initial thread is created by
3013 the parent process.
3014 """
3015 PROCESS_ID = self.USER_DATA
3016 TTHREAD_ID = self.USER_DATA + 1
3017 #STACK_BASE = self.USER_DATA + 2
3018 #STACK_LIMIT = self.USER_DATA + 3
3019 #USER_STACK_BASE = self.USER_DATA + 4
3020 #USER_STACK_LIMIT = self.USER_DATA + 5
3021 #AFFINITY = self.USER_DATA + 6
3022 #WIN32_START_ADDR = self.USER_DATA + 7
3023 #TEB_BASE = self.USER_DATA + 8
3024 #SUB_PROCESS_TAG = self.USER_DATA + 9
3025 #BASE_PRIORITY = self.USER_DATA + 10
3026 #PAGE_PRIORITY = self.USER_DATA + 11
3027 #IO_PRIORITY = self.USER_DATA + 12
3028 #THREAD_FLAGS = self.USER_DATA + 13
3029 # Do not use self.PID here since a process' initial thread is created by
3030 # the parent process.
3031 pid = int(line[PROCESS_ID], 16)
3032 tid = int(line[TTHREAD_ID], 16)
3033 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
3034 self._threads_active[tid] = pid
3035
3036 @classmethod
3037 def supported_events(cls):
3038 """Returns all the procesed events."""
3039 out = []
3040 for member in dir(cls):
3041 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
3042 if match:
3043 out.append(match.groups())
3044 return out
3045
3046 class Tracer(ApiBase.Tracer):
3047 # The basic headers.
3048 EXPECTED_HEADER = [
3049 u'Event Name',
3050 u'Type',
3051 u'Event ID',
3052 u'Version',
3053 u'Channel',
3054 u'Level', # 5
3055 u'Opcode',
3056 u'Task',
3057 u'Keyword',
3058 u'PID',
3059 u'TID', # 10
3060 u'Processor Number',
3061 u'Instance ID',
3062 u'Parent Instance ID',
3063 u'Activity ID',
3064 u'Related Activity ID', # 15
3065 u'Clock-Time',
3066 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
3067 u'User(ms)', # pretty much useless.
3068 u'User Data', # Extra arguments that are event-specific.
3069 ]
3070 # Only the useful headers common to all entries are listed there. Any column
3071 # at 19 or higher is dependent on the specific event.
3072 EVENT_NAME = 0
3073 TYPE = 1
3074 PID = 9
3075 TID = 10
3076 PROCESSOR_ID = 11
3077 TIMESTAMP = 16
3078 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
3079 USER_DATA = 19
3080
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003081 class CsvReader(object):
3082 """CSV reader that reads files generated by tracerpt.exe.
3083
3084 csv.reader() fails to read them properly, it mangles file names quoted
3085 with "" with a comma in it.
3086 """
3087 # 0. Had a ',' or one of the following ' ' after a comma, next should
3088 # be ' ', '"' or string or ',' for an empty field.
3089 ( HAD_DELIMITER,
3090 # 1. Processing an unquoted field up to ','.
3091 IN_STR,
3092 # 2. Processing a new field starting with '"'.
3093 STARTING_STR_QUOTED,
3094 # 3. Second quote in a row at the start of a field. It could be either
3095 # '""foo""' or '""'. Who the hell thought it was a great idea to use
3096 # the same character for delimiting and escaping?
3097 STARTING_SECOND_QUOTE,
3098 # 4. A quote inside a quoted string where the previous character was
3099 # not a quote, so the string is not empty. Can be either: end of a
3100 # quoted string (a delimiter) or a quote escape. The next char must be
3101 # either '"' or ','.
3102 HAD_QUOTE_IN_QUOTED,
3103 # 5. Second quote inside a quoted string.
3104 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
3105 # 6. Processing a field that started with '"'.
3106 IN_STR_QUOTED) = range(7)
3107
3108 def __init__(self, f):
3109 self.f = f
3110
3111 def __iter__(self):
3112 return self
3113
3114 def next(self):
3115 """Splits the line in fields."""
3116 line = self.f.readline()
3117 if not line:
3118 raise StopIteration()
3119 line = line.strip()
3120 fields = []
3121 state = self.HAD_DELIMITER
3122 for i, c in enumerate(line):
3123 if state == self.HAD_DELIMITER:
3124 if c == ',':
3125 # Empty field.
3126 fields.append('')
3127 elif c == ' ':
3128 # Ignore initial whitespaces
3129 pass
3130 elif c == '"':
3131 state = self.STARTING_STR_QUOTED
3132 fields.append('')
3133 else:
3134 # Start of a new field.
3135 state = self.IN_STR
3136 fields.append(c)
3137
3138 elif state == self.IN_STR:
3139 # Do not accept quote inside unquoted field.
3140 assert c != '"', (i, c, line, fields)
3141 if c == ',':
3142 fields[-1] = fields[-1].strip()
3143 state = self.HAD_DELIMITER
3144 else:
3145 fields[-1] = fields[-1] + c
3146
3147 elif state == self.STARTING_STR_QUOTED:
3148 if c == '"':
3149 # Do not store the character yet.
3150 state = self.STARTING_SECOND_QUOTE
3151 else:
3152 state = self.IN_STR_QUOTED
3153 fields[-1] = fields[-1] + c
3154
3155 elif state == self.STARTING_SECOND_QUOTE:
3156 if c == ',':
3157 # It was an empty field. '""' == ''.
3158 state = self.HAD_DELIMITER
3159 else:
3160 fields[-1] = fields[-1] + '"' + c
3161 state = self.IN_STR_QUOTED
3162
3163 elif state == self.HAD_QUOTE_IN_QUOTED:
3164 if c == ',':
3165 # End of the string.
3166 state = self.HAD_DELIMITER
3167 elif c == '"':
3168 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
3169 else:
3170 # The previous double-quote was just an unescaped quote.
3171 fields[-1] = fields[-1] + '"' + c
3172 state = self.IN_STR_QUOTED
3173
3174 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
3175 if c == ',':
3176 # End of the string.
3177 state = self.HAD_DELIMITER
3178 fields[-1] = fields[-1] + '"'
3179 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00003180 # That's just how the logger rolls. Revert back to appending the
3181 # char and "guess" it was a quote in a double-quoted string.
3182 state = self.IN_STR_QUOTED
3183 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003184
3185 elif state == self.IN_STR_QUOTED:
3186 if c == '"':
3187 # Could be a delimiter or an escape.
3188 state = self.HAD_QUOTE_IN_QUOTED
3189 else:
3190 fields[-1] = fields[-1] + c
3191
3192 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
3193 fields[-1] = fields[-1] + '"'
3194 else:
3195 assert state in (
3196 # Terminated with a normal field.
3197 self.IN_STR,
3198 # Terminated with an empty field.
3199 self.STARTING_SECOND_QUOTE,
3200 # Terminated with a normal quoted field.
3201 self.HAD_QUOTE_IN_QUOTED), (
3202 line, state, fields)
3203 return fields
3204
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003205 def __init__(self, logname):
3206 """Starts the log collection.
3207
3208 Requires administrative access. logman.exe is synchronous so no need for a
3209 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
3210 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
3211
3212 One can get the list of potentially interesting providers with:
3213 "logman query providers | findstr /i file"
3214 """
3215 super(LogmanTrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00003216 self._signal_script = create_subprocess_thunk()
3217 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003218 cmd_start = [
3219 'logman.exe',
3220 'start',
3221 'NT Kernel Logger',
3222 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
3223 # splitio,fileiocompletion,syscall,file,cswitch,img
3224 '(process,fileio,thread)',
3225 '-o', self._logname + '.etl',
3226 '-ets', # Send directly to kernel
3227 # Values extracted out of thin air.
3228 # Event Trace Session buffer size in kb.
3229 '-bs', '10240',
3230 # Number of Event Trace Session buffers.
3231 '-nb', '16', '256',
3232 ]
3233 logging.debug('Running: %s' % cmd_start)
3234 try:
3235 subprocess.check_call(
3236 cmd_start,
3237 stdin=subprocess.PIPE,
3238 stdout=subprocess.PIPE,
3239 stderr=subprocess.STDOUT)
3240 except subprocess.CalledProcessError, e:
3241 if e.returncode == -2147024891:
3242 print >> sys.stderr, 'Please restart with an elevated admin prompt'
3243 elif e.returncode == -2144337737:
3244 print >> sys.stderr, (
3245 'A kernel trace was already running, stop it and try again')
3246 raise
3247
3248 def trace(self, cmd, cwd, tracename, output):
3249 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
3250 assert os.path.isabs(cmd[0]), cmd[0]
3251 assert os.path.isabs(cwd), cwd
3252 assert os.path.normpath(cwd) == cwd, cwd
3253 with self._lock:
3254 if not self._initialized:
3255 raise TracingFailure(
3256 'Called Tracer.trace() on an unitialized object',
3257 None, None, None, tracename)
3258 assert tracename not in (i['trace'] for i in self._traces)
3259
3260 # Use "logman -?" for help.
3261
3262 stdout = stderr = None
3263 if output:
3264 stdout = subprocess.PIPE
3265 stderr = subprocess.STDOUT
3266
3267 # Run the child process.
3268 logging.debug('Running: %s' % cmd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00003269 # Use the temporary script generated with create_subprocess_thunk() so we
3270 # have a clear pid owner. Since trace_inputs.py can be used as a library
3271 # and could trace multiple processes simultaneously, it makes it more
3272 # complex if the executable to be traced is executed directly here. It
3273 # also solves issues related to logman.exe that needs to be executed to
3274 # control the kernel trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003275 child_cmd = [
3276 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00003277 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003278 tracename,
3279 ]
3280 child = subprocess.Popen(
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003281 child_cmd + tools.fix_python_path(cmd),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003282 cwd=cwd,
3283 stdin=subprocess.PIPE,
3284 stdout=stdout,
3285 stderr=stderr)
3286 logging.debug('Started child pid: %d' % child.pid)
3287 out = child.communicate()[0]
3288 # This doesn't mean all the grand-children are done. Sadly, we don't have
3289 # a good way to determine that.
3290
3291 with self._lock:
3292 assert tracename not in (i['trace'] for i in self._traces)
3293 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00003294 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003295 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00003296 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003297 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00003298 # Used to figure out the real process when process ids are reused.
3299 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003300 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003301 })
3302
3303 return child.returncode, out
3304
3305 def close(self, _timeout=None):
3306 """Stops the kernel log collection and converts the traces to text
3307 representation.
3308 """
3309 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00003310 try:
3311 super(LogmanTrace.Tracer, self).close()
3312 finally:
3313 cmd_stop = [
3314 'logman.exe',
3315 'stop',
3316 'NT Kernel Logger',
3317 '-ets', # Sends the command directly to the kernel.
3318 ]
3319 logging.debug('Running: %s' % cmd_stop)
3320 subprocess.check_call(
3321 cmd_stop,
3322 stdin=subprocess.PIPE,
3323 stdout=subprocess.PIPE,
3324 stderr=subprocess.STDOUT)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003325
3326 def post_process_log(self):
3327 """Converts the .etl file into .csv then into .json."""
3328 super(LogmanTrace.Tracer, self).post_process_log()
3329 logformat = 'csv'
3330 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003331 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003332
maruel@chromium.org808e3f22013-05-16 16:42:54 +00003333 def _gen_logdata(self):
3334 return {
3335 'format': 'csv',
3336 'traces': self._traces,
3337 }
3338
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003339 def _trim_log(self, logformat):
3340 """Reduces the amount of data in original log by generating a 'reduced'
3341 log.
3342 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003343 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003344 file_handle = codecs.open(
3345 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003346
3347 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003348 assert sys.getfilesystemencoding() == 'mbcs'
3349 file_handle = codecs.open(
3350 self._logname + '.' + logformat, 'r',
3351 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003352
3353 supported_events = LogmanTrace.Context.supported_events()
3354
3355 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003356 """Loads items from the generator and returns the interesting data.
3357
3358 It filters out any uninteresting line and reduce the amount of data in
3359 the trace.
3360 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003361 for index, line in enumerate(generator):
3362 if not index:
3363 if line != self.EXPECTED_HEADER:
3364 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003365 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003366 None, None, None)
3367 continue
3368 # As you can see, the CSV is full of useful non-redundant information:
3369 if (line[2] != '0' or # Event ID
3370 line[3] not in ('2', '3') or # Version
3371 line[4] != '0' or # Channel
3372 line[5] != '0' or # Level
3373 line[7] != '0' or # Task
3374 line[8] != '0x0000000000000000' or # Keyword
3375 line[12] != '' or # Instance ID
3376 line[13] != '' or # Parent Instance ID
3377 line[14] != self.NULL_GUID or # Activity ID
3378 line[15] != ''): # Related Activity ID
3379 raise TracingFailure(
3380 'Found unexpected values in line: %s' % ' '.join(line),
3381 None, None, None)
3382
3383 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3384 continue
3385
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003386 yield [
3387 line[self.EVENT_NAME],
3388 line[self.TYPE],
3389 line[self.PID],
3390 line[self.TID],
3391 line[self.PROCESSOR_ID],
3392 line[self.TIMESTAMP],
3393 ] + line[self.USER_DATA:]
3394
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003395 # must not convert the trim() call into a list, since it will use too much
3396 # memory for large trace. use a csv file as a workaround since the json
3397 # parser requires a complete in-memory file.
3398 with open('%s.preprocessed' % self._logname, 'wb') as f:
3399 # $ and * can't be used in file name on windows, reducing the likelihood
3400 # of having to escape a string.
3401 out = csv.writer(
3402 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003403 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003404 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003405
3406 def _convert_log(self, logformat):
3407 """Converts the ETL trace to text representation.
3408
3409 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3410 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3411
3412 Arguments:
3413 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3414
3415 Use "tracerpt -?" for help.
3416 """
3417 LOCALE_INVARIANT = 0x7F
3418 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3419 cmd_convert = [
3420 'tracerpt.exe',
3421 '-l', self._logname + '.etl',
3422 '-o', self._logname + '.' + logformat,
3423 '-gmt', # Use UTC
3424 '-y', # No prompt
3425 # Use -of XML to get the header of each items after column 19, e.g. all
3426 # the actual headers of 'User Data'.
3427 ]
3428
3429 if logformat == 'csv':
3430 # tracerpt localizes the 'Type' column, for major brainfuck
3431 # entertainment. I can't imagine any sane reason to do that.
3432 cmd_convert.extend(['-of', 'CSV'])
3433 elif logformat == 'csv_utf16':
3434 # This causes it to use UTF-16, which doubles the log size but ensures
3435 # the log is readable for non-ASCII characters.
3436 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3437 elif logformat == 'xml':
3438 cmd_convert.extend(['-of', 'XML'])
3439 else:
3440 raise ValueError('Unexpected log format \'%s\'' % logformat)
3441 logging.debug('Running: %s' % cmd_convert)
3442 # This can takes tens of minutes for large logs.
3443 # Redirects all output to stderr.
3444 subprocess.check_call(
3445 cmd_convert,
3446 stdin=subprocess.PIPE,
3447 stdout=sys.stderr,
3448 stderr=sys.stderr)
3449
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003450 def __init__(self, use_sudo=False): # pylint: disable=W0613
3451 super(LogmanTrace, self).__init__()
3452 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3453
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003454 @staticmethod
3455 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003456 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003457 if os.path.isfile(logname + ext):
3458 os.remove(logname + ext)
3459
3460 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003461 def parse_log(cls, logname, blacklist, trace_name):
3462 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003463 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003464
3465 def blacklist_more(filepath):
3466 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3467 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3468
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003469 # Create a list of (Context, result_dict) tuples. This is necessary because
3470 # the csv file may be larger than the amount of available memory.
3471 contexes = [
3472 (
3473 cls.Context(
3474 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3475 {
3476 'output': item['output'],
3477 'trace': item['trace'],
3478 },
3479 )
3480 for item in read_json(logname)['traces']
3481 if not trace_name or item['trace'] == trace_name
3482 ]
3483
3484 # The log may be too large to fit in memory and it is not efficient to read
3485 # it multiple times, so multiplex the contexes instead, which is slightly
3486 # more awkward.
3487 with open('%s.preprocessed' % logname, 'rb') as f:
3488 lines = csv.reader(
3489 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3490 for encoded in lines:
3491 line = [s.decode('utf-8') for s in encoded]
3492 # Convert the PID in-place from hex.
3493 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3494 for context in contexes:
3495 if 'exception' in context[1]:
3496 continue
3497 try:
3498 context[0].on_line(line)
3499 except TracingFailure:
3500 context[1]['exception'] = sys.exc_info()
3501
3502 for context in contexes:
3503 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003504 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003505 context[1]['results'] = context[0].to_results()
3506
3507 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003508
3509
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003510def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003511 """Returns the correct implementation for the current OS."""
3512 if sys.platform == 'cygwin':
3513 raise NotImplementedError(
3514 'Not implemented for cygwin, start the script from Win32 python')
3515 flavors = {
3516 'win32': LogmanTrace,
3517 'darwin': Dtrace,
3518 'sunos5': Dtrace,
3519 'freebsd7': Dtrace,
3520 'freebsd8': Dtrace,
3521 }
3522 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003523 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003524
3525
3526def extract_directories(root_dir, files, blacklist):
3527 """Detects if all the files in a directory are in |files| and if so, replace
3528 the individual files by a Results.Directory instance.
3529
3530 Takes a list of Results.File instances and returns a shorter list of
3531 Results.File and Results.Directory instances.
3532
3533 Arguments:
3534 - root_dir: Optional base directory that shouldn't be search further.
3535 - files: list of Results.File instances.
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003536 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003537 """
3538 logging.info(
3539 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3540 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003541 # It is important for root_dir to not be a symlinked path, make sure to call
3542 # os.path.realpath() as needed.
3543 assert not root_dir or (
3544 os.path.realpath(get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003545 assert not any(isinstance(f, Results.Directory) for f in files)
3546 # Remove non existent files.
3547 files = [f for f in files if f.existent]
3548 if not files:
3549 return files
3550 # All files must share the same root, which can be None.
3551 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3552
3553 # Creates a {directory: {filename: File}} mapping, up to root.
3554 buckets = {}
3555 if root_dir:
3556 buckets[root_dir] = {}
3557 for fileobj in files:
3558 path = fileobj.full_path
3559 directory = os.path.dirname(path)
3560 assert directory
3561 # Do not use os.path.basename() so trailing os.path.sep is kept.
3562 basename = path[len(directory)+1:]
3563 files_in_directory = buckets.setdefault(directory, {})
3564 files_in_directory[basename] = fileobj
3565 # Add all the directories recursively up to root.
3566 while True:
3567 old_d = directory
3568 directory = os.path.dirname(directory)
3569 if directory + os.path.sep == root_dir or directory == old_d:
3570 break
3571 buckets.setdefault(directory, {})
3572
3573 root_prefix = len(root_dir) + 1 if root_dir else 0
3574 for directory in sorted(buckets, reverse=True):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003575 if not os.path.isdir(directory):
3576 logging.debug(
3577 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3578 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003579 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3580 expected = set(buckets[directory])
3581 if not (actual - expected):
3582 parent = os.path.dirname(directory)
3583 buckets[parent][os.path.basename(directory)] = Results.Directory(
3584 root_dir,
3585 directory[root_prefix:],
3586 False,
3587 sum(f.size for f in buckets[directory].itervalues()),
3588 sum(f.nb_files for f in buckets[directory].itervalues()))
3589 # Remove the whole bucket.
3590 del buckets[directory]
3591
3592 # Reverse the mapping with what remains. The original instances are returned,
3593 # so the cached meta data is kept.
3594 files = sum((x.values() for x in buckets.itervalues()), [])
3595 return sorted(files, key=lambda x: x.path)
3596
3597
3598def trace(logfile, cmd, cwd, api, output):
3599 """Traces an executable. Returns (returncode, output) from api.
3600
3601 Arguments:
3602 - logfile: file to write to.
3603 - cmd: command to run.
3604 - cwd: current directory to start the process in.
3605 - api: a tracing api instance.
3606 - output: if True, returns output, otherwise prints it at the console.
3607 """
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003608 cmd = tools.fix_python_path(cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003609 api.clean_trace(logfile)
3610 with api.get_tracer(logfile) as tracer:
3611 return tracer.trace(cmd, cwd, 'default', output)
3612
3613
maruel@chromium.orge5322512013-08-19 20:17:57 +00003614def CMDclean(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003615 """Cleans up traces."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003616 options, args = parser.parse_args(args)
3617 api = get_api()
3618 api.clean_trace(options.log)
3619 return 0
3620
3621
maruel@chromium.orge5322512013-08-19 20:17:57 +00003622def CMDtrace(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003623 """Traces an executable."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003624 parser.allow_interspersed_args = False
3625 parser.add_option(
3626 '-q', '--quiet', action='store_true',
3627 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003628 parser.add_option(
3629 '-s', '--sudo', action='store_true',
3630 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3631 parser.add_option(
3632 '-n', '--no-sudo', action='store_false',
3633 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003634 options, args = parser.parse_args(args)
3635
3636 if not args:
3637 parser.error('Please provide a command to run')
3638
3639 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3640 args[0] = os.path.abspath(args[0])
3641
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003642 # options.sudo default value is None, which is to do whatever tracer defaults
3643 # do.
3644 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003645 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3646
3647
maruel@chromium.orge5322512013-08-19 20:17:57 +00003648def CMDread(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003649 """Reads the logs and prints the result."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003650 parser.add_option(
3651 '-V', '--variable',
3652 nargs=2,
3653 action='append',
3654 dest='variables',
3655 metavar='VAR_NAME directory',
3656 default=[],
3657 help=('Variables to replace relative directories against. Example: '
3658 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3659 'home dir with $HOME') % getpass.getuser())
3660 parser.add_option(
3661 '--root-dir',
3662 help='Root directory to base everything off it. Anything outside of this '
3663 'this directory will not be reported')
3664 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003665 '--trace-name',
3666 help='Only reads one of the trace. Defaults to reading all traces')
3667 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003668 '-j', '--json', action='store_true',
3669 help='Outputs raw result data as json')
3670 parser.add_option(
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003671 '--trace-blacklist', action='append', default=[],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003672 help='List of regexp to use as blacklist filter')
3673 options, args = parser.parse_args(args)
3674
3675 if options.root_dir:
maruel@chromium.org7dc17b32013-04-02 22:27:05 +00003676 options.root_dir = get_native_path_case(
3677 unicode(os.path.abspath(options.root_dir)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003678
3679 variables = dict(options.variables)
3680 api = get_api()
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003681 blacklist = gen_blacklist(options.trace_blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003682 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003683 # Process each trace.
3684 output_as_json = []
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003685 try:
3686 for item in data:
3687 if 'exception' in item:
3688 # Do not abort the other traces.
3689 print >> sys.stderr, (
3690 'Trace %s: Got an exception: %s' % (
3691 item['trace'], item['exception'][1]))
3692 continue
3693 results = item['results']
3694 if options.root_dir:
3695 results = results.strip_root(options.root_dir)
3696
3697 if options.json:
3698 output_as_json.append(results.flatten())
3699 else:
3700 simplified = extract_directories(
3701 options.root_dir, results.files, blacklist)
3702 simplified = [f.replace_variables(variables) for f in simplified]
3703 if len(data) > 1:
3704 print('Trace: %s' % item['trace'])
3705 print('Total: %d' % len(results.files))
3706 print('Non existent: %d' % len(results.non_existent))
3707 for f in results.non_existent:
3708 print(' %s' % f.path)
3709 print(
3710 'Interesting: %d reduced to %d' % (
3711 len(results.existent), len(simplified)))
3712 for f in simplified:
3713 print(' %s' % f.path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003714
3715 if options.json:
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003716 write_json(sys.stdout, output_as_json, False)
3717 except KeyboardInterrupt:
3718 return 1
3719 except IOError as e:
3720 if e.errno == errno.EPIPE:
3721 # Do not print a stack trace when the output is piped to less and the user
3722 # quits before the whole output was written.
3723 return 1
3724 raise
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003725 return 0
3726
3727
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003728class OptionParserTraceInputs(tools.OptionParserWithLogging):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003729 """Adds automatic --log handling."""
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003730
3731 # Disable --log-file options since both --log and --log-file options are
3732 # confusing.
3733 # TODO(vadimsh): Rename --log-file or --log to something else.
3734 enable_log_file = False
3735
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003736 def __init__(self, **kwargs):
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003737 tools.OptionParserWithLogging.__init__(self, **kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003738 self.add_option(
3739 '-l', '--log', help='Log file to generate or read, required')
3740
3741 def parse_args(self, *args, **kwargs):
3742 """Makes sure the paths make sense.
3743
3744 On Windows, / and \ are often mixed together in a path.
3745 """
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003746 options, args = tools.OptionParserWithLogging.parse_args(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003747 self, *args, **kwargs)
3748 if not options.log:
3749 self.error('Must supply a log file with -l')
3750 options.log = os.path.abspath(options.log)
3751 return options, args
3752
3753
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003754def main(argv):
maruel@chromium.orge5322512013-08-19 20:17:57 +00003755 dispatcher = subcommand.CommandDispatcher(__name__)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003756 try:
maruel@chromium.org3d671992013-08-20 00:38:27 +00003757 return dispatcher.execute(
3758 OptionParserTraceInputs(version=__version__), argv)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003759 except TracingFailure, e:
3760 sys.stderr.write('\nError: ')
3761 sys.stderr.write(str(e))
3762 sys.stderr.write('\n')
3763 return 1
3764
3765
3766if __name__ == '__main__':
maruel@chromium.orge5322512013-08-19 20:17:57 +00003767 fix_encoding.fix_encoding()
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003768 tools.disable_buffering()
maruel@chromium.orge5322512013-08-19 20:17:57 +00003769 colorama.init()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003770 sys.exit(main(sys.argv[1:]))