blob: 47f798b72b127f656879b5a5e35c80ff7f12153b [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05003# Copyright 2012 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05004# Use of this source code is governed under the Apache License, Version 2.0 that
5# can be found in the LICENSE file.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00006
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
maruel@chromium.org9e56b592013-05-21 16:56:05 +000023import errno
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000024import getpass
25import glob
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000026import logging
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000027import os
28import re
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000029import stat
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000030import subprocess
31import sys
32import tempfile
33import threading
34import time
35import weakref
36
maruel@chromium.orge5322512013-08-19 20:17:57 +000037from third_party import colorama
38from third_party.depot_tools import fix_encoding
39from third_party.depot_tools import subcommand
40
maruel@chromium.org561d4b22013-09-26 21:08:08 +000041from utils import file_path
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -040042from utils import logging_utils
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000043from utils import tools
44
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000045## OS-specific imports
46
47if sys.platform == 'win32':
maruel@chromium.org561d4b22013-09-26 21:08:08 +000048 from ctypes.wintypes import byref, c_int, c_wchar_p
49 from ctypes.wintypes import windll # pylint: disable=E0611
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000050
51
maruel@chromium.org3d671992013-08-20 00:38:27 +000052__version__ = '0.1'
53
54
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000055BASE_DIR = os.path.dirname(os.path.abspath(__file__))
56ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
57
58
59class TracingFailure(Exception):
60 """An exception occured during tracing."""
61 def __init__(self, description, pid, line_number, line, *args):
62 super(TracingFailure, self).__init__(
63 description, pid, line_number, line, *args)
64 self.description = description
65 self.pid = pid
66 self.line_number = line_number
67 self.line = line
68 self.extra = args
69
70 def __str__(self):
71 out = self.description
72 if self.pid:
73 out += '\npid: %d' % self.pid
74 if self.line_number:
75 out += '\nline: %d' % self.line_number
76 if self.line:
77 out += '\n%s' % self.line
78 if self.extra:
79 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
80 return out
81
82
83## OS-specific functions
84
85if sys.platform == 'win32':
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000086 def get_current_encoding():
87 """Returns the 'ANSI' code page associated to the process."""
88 return 'cp%d' % int(windll.kernel32.GetACP())
89
90
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000091 def CommandLineToArgvW(command_line):
92 """Splits a commandline into argv using CommandLineToArgvW()."""
93 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
94 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +000095 assert isinstance(command_line, unicode)
96 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000097 try:
98 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
99 finally:
100 windll.kernel32.LocalFree(ptr)
101
102
Marc-Antoine Ruel5906fcd2013-11-08 21:02:24 -0500103def can_trace():
104 """Returns True if the user is an administrator on Windows.
105
106 It is required for tracing to work.
107 """
108 if sys.platform != 'win32':
109 return True
110 return bool(windll.shell32.IsUserAnAdmin())
111
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000112
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000113def create_subprocess_thunk():
114 """Creates a small temporary script to start the child process.
115
116 This thunk doesn't block, its unique name is used to identify it as the
117 parent.
118 """
Marc-Antoine Ruel3c979cb2015-03-11 13:43:28 -0400119 handle, name = tempfile.mkstemp(prefix=u'trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000120 try:
121 os.write(
122 handle,
123 (
124 'import subprocess, sys\n'
125 'sys.exit(subprocess.call(sys.argv[2:]))\n'
126 ))
127 finally:
128 os.close(handle)
129 return name
130
131
132def create_exec_thunk():
133 """Creates a small temporary script to start the child executable.
134
135 Reads from the file handle provided as the fisrt argument to block, then
136 execv() the command to be traced.
137 """
Marc-Antoine Ruel3c979cb2015-03-11 13:43:28 -0400138 handle, name = tempfile.mkstemp(prefix=u'trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000139 try:
140 os.write(
141 handle,
142 (
143 'import os, sys\n'
144 'fd = int(sys.argv[1])\n'
145 # This will block until the controlling process writes a byte on the
146 # pipe. It will do so once the tracing tool, e.g. strace, is ready to
147 # trace.
148 'os.read(fd, 1)\n'
149 'os.close(fd)\n'
150 'os.execv(sys.argv[2], sys.argv[2:])\n'
151 ))
152 finally:
153 os.close(handle)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000154 return name
155
156
157def strace_process_quoted_arguments(text):
158 """Extracts quoted arguments on a string and return the arguments as a list.
159
160 Implemented as an automaton. Supports incomplete strings in the form
161 '"foo"...'.
162
163 Example:
164 With text = '"foo", "bar"', the function will return ['foo', 'bar']
165
166 TODO(maruel): Implement escaping.
167 """
168 # All the possible states of the DFA.
169 ( NEED_QUOTE, # Begining of a new arguments.
170 INSIDE_STRING, # Inside an argument.
171 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
172 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
173 # a serie of 3 dots or a comma.
174 NEED_SPACE, # Right after a comma
175 NEED_DOT_2, # Found a dot, need a second one.
176 NEED_DOT_3, # Found second dot, need a third one.
177 NEED_COMMA, # Found third dot, need a comma.
178 ) = range(8)
179
180 state = NEED_QUOTE
181 out = []
182 for index, char in enumerate(text):
183 if char == '"':
184 if state == NEED_QUOTE:
185 state = INSIDE_STRING
186 # A new argument was found.
187 out.append('')
188 elif state == INSIDE_STRING:
189 # The argument is now closed.
190 state = NEED_COMMA_OR_DOT
191 elif state == ESCAPED:
192 out[-1] += char
193 state = INSIDE_STRING
194 else:
195 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000196 'Can\'t process char \'%s\' at column %d for: %r' % (
197 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000198 index,
199 text)
200 elif char == ',':
201 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
202 state = NEED_SPACE
203 elif state == INSIDE_STRING:
204 out[-1] += char
205 elif state == ESCAPED:
206 out[-1] += char
207 state = INSIDE_STRING
208 else:
209 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000210 'Can\'t process char \'%s\' at column %d for: %r' % (
211 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000212 index,
213 text)
214 elif char == ' ':
215 if state == NEED_SPACE:
216 state = NEED_QUOTE
217 elif state == INSIDE_STRING:
218 out[-1] += char
219 elif state == ESCAPED:
220 out[-1] += char
221 state = INSIDE_STRING
222 else:
223 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000224 'Can\'t process char \'%s\' at column %d for: %r' % (
225 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000226 index,
227 text)
228 elif char == '.':
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000229 if state in (NEED_QUOTE, NEED_COMMA_OR_DOT):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000230 # The string is incomplete, this mean the strace -s flag should be
231 # increased.
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000232 # For NEED_QUOTE, the input string would look like '"foo", ...'.
233 # For NEED_COMMA_OR_DOT, the input string would look like '"foo"...'
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000234 state = NEED_DOT_2
235 elif state == NEED_DOT_2:
236 state = NEED_DOT_3
237 elif state == NEED_DOT_3:
238 state = NEED_COMMA
239 elif state == INSIDE_STRING:
240 out[-1] += char
241 elif state == ESCAPED:
242 out[-1] += char
243 state = INSIDE_STRING
244 else:
245 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000246 'Can\'t process char \'%s\' at column %d for: %r' % (
247 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000248 index,
249 text)
250 elif char == '\\':
251 if state == ESCAPED:
252 out[-1] += char
253 state = INSIDE_STRING
254 elif state == INSIDE_STRING:
255 state = ESCAPED
256 else:
257 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000258 'Can\'t process char \'%s\' at column %d for: %r' % (
259 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000260 index,
261 text)
262 else:
263 if state == INSIDE_STRING:
264 out[-1] += char
265 else:
266 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000267 'Can\'t process char \'%s\' at column %d for: %r' % (
268 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000269 index,
270 text)
271 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
272 raise ValueError(
273 'String is incorrectly terminated: %r' % text,
274 text)
275 return out
276
277
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000278def assert_is_renderable(pseudo_string):
279 """Asserts the input is a valid object to be processed by render()."""
280 assert (
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000281 pseudo_string is None or
282 isinstance(pseudo_string, unicode) or
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000283 hasattr(pseudo_string, 'render')), repr(pseudo_string)
284
285
286def render(pseudo_string):
287 """Converts the pseudo-string to an unicode string."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000288 if pseudo_string is None or isinstance(pseudo_string, unicode):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000289 return pseudo_string
290 return pseudo_string.render()
291
292
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000293class Results(object):
294 """Results of a trace session."""
295
296 class _TouchedObject(object):
297 """Something, a file or a directory, that was accessed."""
298 def __init__(self, root, path, tainted, size, nb_files):
299 logging.debug(
300 '%s(%s, %s, %s, %s, %s)' %
301 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000302 assert_is_renderable(root)
303 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000304 self.root = root
305 self.path = path
306 self.tainted = tainted
307 self.nb_files = nb_files
maruel@chromium.org538141b2013-06-03 20:57:17 +0000308 # Can be used as a cache or a default value, depending on context. In
309 # particular, once self.tainted is True, because the path was replaced
310 # with a variable, it is not possible to look up the file size.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000311 self._size = size
312 # These are cache only.
313 self._real_path = None
314
315 # Check internal consistency.
316 assert path, path
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000317 assert tainted or bool(root) != bool(file_path.isabs(path)), (root, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000318 assert tainted or (
319 not os.path.exists(self.full_path) or
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000320 (self.full_path == file_path.get_native_path_case(self.full_path))), (
321 tainted,
322 self.full_path,
323 file_path.get_native_path_case(self.full_path))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000324
325 @property
326 def existent(self):
327 return self.size != -1
328
329 @property
330 def full_path(self):
331 if self.root:
332 return os.path.join(self.root, self.path)
333 return self.path
334
335 @property
336 def real_path(self):
337 """Returns the path with symlinks resolved."""
338 if not self._real_path:
339 self._real_path = os.path.realpath(self.full_path)
340 return self._real_path
341
342 @property
343 def size(self):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000344 """File's size. -1 is not existent.
345
346 Once tainted, it is not possible the retrieve the file size anymore since
347 the path is composed of variables.
348 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000349 if self._size is None and not self.tainted:
350 try:
351 self._size = os.stat(self.full_path).st_size
352 except OSError:
353 self._size = -1
354 return self._size
355
356 def flatten(self):
357 """Returns a dict representing this object.
358
359 A 'size' of 0 means the file was only touched and not read.
360 """
361 return {
362 'path': self.path,
363 'size': self.size,
364 }
365
366 def replace_variables(self, variables):
367 """Replaces the root of this File with one of the variables if it matches.
368
369 If a variable replacement occurs, the cloned object becomes tainted.
370 """
371 for variable, root_path in variables.iteritems():
372 if self.path.startswith(root_path):
373 return self._clone(
374 self.root, variable + self.path[len(root_path):], True)
375 # No need to clone, returns ourself.
376 return self
377
378 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000379 """Returns a clone of itself with 'root' stripped off.
380
381 Note that the file is kept if it is either accessible from a symlinked
382 path that was used to access the file or through the real path.
383 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000384 # Check internal consistency.
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000385 assert (
386 self.tainted or
387 (file_path.isabs(root) and root.endswith(os.path.sep))), root
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000388 if not self.full_path.startswith(root):
389 # Now try to resolve the symlinks to see if it can be reached this way.
390 # Only try *after* trying without resolving symlink.
391 if not self.real_path.startswith(root):
392 return None
393 path = self.real_path
394 else:
395 path = self.full_path
396 return self._clone(root, path[len(root):], self.tainted)
397
398 def _clone(self, new_root, new_path, tainted):
399 raise NotImplementedError(self.__class__.__name__)
400
401 class File(_TouchedObject):
402 """A file that was accessed. May not be present anymore.
403
404 If tainted is true, it means it is not a real path anymore as a variable
405 replacement occured.
406
maruel@chromium.org538141b2013-06-03 20:57:17 +0000407 |mode| can be one of None, TOUCHED, READ or WRITE.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000408 """
maruel@chromium.org538141b2013-06-03 20:57:17 +0000409 # Was probed for existence, and it is existent, but was never _opened_.
410 TOUCHED = 't'
411 # Opened for read only and guaranteed to not have been written to.
412 READ = 'r'
413 # Opened for write.
414 WRITE = 'w'
415
416 # They are listed in order of priority. E.g. if a file is traced as TOUCHED
417 # then as WRITE, only keep WRITE. None means no idea, which is a problem on
418 # Windows.
419 ACCEPTABLE_MODES = (None, TOUCHED, READ, WRITE)
420
421 def __init__(self, root, path, tainted, size, mode):
422 assert mode in self.ACCEPTABLE_MODES
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000423 super(Results.File, self).__init__(root, path, tainted, size, 1)
maruel@chromium.org538141b2013-06-03 20:57:17 +0000424 self.mode = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000425
426 def _clone(self, new_root, new_path, tainted):
427 """Clones itself keeping meta-data."""
428 # Keep the self.size and self._real_path caches for performance reason. It
429 # is also important when the file becomes tainted (with a variable instead
430 # of the real path) since self.path is not an on-disk path anymore so
431 # out._size cannot be updated.
maruel@chromium.org538141b2013-06-03 20:57:17 +0000432 out = self.__class__(new_root, new_path, tainted, self.size, self.mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000433 out._real_path = self._real_path
434 return out
435
maruel@chromium.org538141b2013-06-03 20:57:17 +0000436 def flatten(self):
437 out = super(Results.File, self).flatten()
438 out['mode'] = self.mode
439 return out
440
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000441 class Directory(_TouchedObject):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000442 """A directory of files. Must exist.
443
444 For a Directory instance, self.size is not a cache, it's an actual value
445 that is never modified and represents the total size of the files contained
446 in this directory. It is possible that the directory is empty so that
447 size==0; this happens if there's only an invalid symlink in it.
448 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000449 def __init__(self, root, path, tainted, size, nb_files):
450 """path='.' is a valid value and must be handled appropriately."""
451 assert not path.endswith(os.path.sep), path
452 super(Results.Directory, self).__init__(
453 root, path + os.path.sep, tainted, size, nb_files)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000454
455 def flatten(self):
456 out = super(Results.Directory, self).flatten()
457 out['nb_files'] = self.nb_files
458 return out
459
460 def _clone(self, new_root, new_path, tainted):
461 """Clones itself keeping meta-data."""
462 out = self.__class__(
463 new_root,
464 new_path.rstrip(os.path.sep),
465 tainted,
466 self.size,
467 self.nb_files)
468 out._real_path = self._real_path
469 return out
470
471 class Process(object):
472 """A process that was traced.
473
474 Contains references to the files accessed by this process and its children.
475 """
476 def __init__(self, pid, files, executable, command, initial_cwd, children):
477 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
478 self.pid = pid
479 self.files = sorted(files, key=lambda x: x.path)
480 self.children = children
481 self.executable = executable
482 self.command = command
483 self.initial_cwd = initial_cwd
484
485 # Check internal consistency.
486 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
487 f.path for f in self.files)
488 assert isinstance(self.children, list)
489 assert isinstance(self.files, list)
490
491 @property
492 def all(self):
493 for child in self.children:
494 for i in child.all:
495 yield i
496 yield self
497
498 def flatten(self):
499 return {
500 'children': [c.flatten() for c in self.children],
501 'command': self.command,
502 'executable': self.executable,
503 'files': [f.flatten() for f in self.files],
504 'initial_cwd': self.initial_cwd,
505 'pid': self.pid,
506 }
507
508 def strip_root(self, root):
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000509 assert file_path.isabs(root) and root.endswith(os.path.sep), root
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000510 # Loads the files after since they are constructed as objects.
511 out = self.__class__(
512 self.pid,
513 filter(None, (f.strip_root(root) for f in self.files)),
514 self.executable,
515 self.command,
516 self.initial_cwd,
517 [c.strip_root(root) for c in self.children])
518 logging.debug(
519 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
520 return out
521
522 def __init__(self, process):
523 self.process = process
524 # Cache.
525 self._files = None
526
527 def flatten(self):
528 return {
529 'root': self.process.flatten(),
530 }
531
532 @property
533 def files(self):
534 if self._files is None:
535 self._files = sorted(
536 sum((p.files for p in self.process.all), []),
537 key=lambda x: x.path)
538 return self._files
539
540 @property
541 def existent(self):
542 return [f for f in self.files if f.existent]
543
544 @property
545 def non_existent(self):
546 return [f for f in self.files if not f.existent]
547
548 def strip_root(self, root):
549 """Returns a clone with all the files outside the directory |root| removed
550 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000551
552 It keeps files accessible through the |root| directory or that have been
553 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000554 """
555 # Resolve any symlink
556 root = os.path.realpath(root)
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000557 root = (
558 file_path.get_native_path_case(root).rstrip(os.path.sep) + os.path.sep)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000559 logging.debug('strip_root(%s)' % root)
560 return Results(self.process.strip_root(root))
561
562
563class ApiBase(object):
564 """OS-agnostic API to trace a process and its children."""
565 class Context(object):
566 """Processes one log line at a time and keeps the list of traced processes.
567
568 The parsing is complicated by the fact that logs are traced out of order for
569 strace but in-order for dtrace and logman. In addition, on Windows it is
570 very frequent that processids are reused so a flat list cannot be used. But
571 at the same time, it is impossible to faithfully construct a graph when the
572 logs are processed out of order. So both a tree and a flat mapping are used,
573 the tree is the real process tree, while the flat mapping stores the last
574 valid process for the corresponding processid. For the strace case, the
575 tree's head is guessed at the last moment.
576 """
577 class Process(object):
578 """Keeps context for one traced child process.
579
580 Logs all the files this process touched. Ignores directories.
581 """
582 def __init__(self, blacklist, pid, initial_cwd):
583 # Check internal consistency.
584 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000585 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000586 self.pid = pid
587 # children are Process instances.
588 self.children = []
589 self.initial_cwd = initial_cwd
590 self.cwd = None
maruel@chromium.org538141b2013-06-03 20:57:17 +0000591 self.files = {}
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000592 self.executable = None
593 self.command = None
594 self._blacklist = blacklist
595
596 def to_results_process(self):
597 """Resolves file case sensitivity and or late-bound strings."""
598 # When resolving files, it's normal to get dupe because a file could be
599 # opened multiple times with different case. Resolve the deduplication
600 # here.
601 def fix_path(x):
602 """Returns the native file path case.
603
604 Converts late-bound strings.
605 """
606 if not x:
607 # Do not convert None instance to 'None'.
608 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000609 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000610 if os.path.isabs(x):
611 # If the path is not absolute, which tends to happen occasionally on
612 # Windows, it is not possible to get the native path case so ignore
613 # that trace. It mostly happens for 'executable' value.
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000614 x = file_path.get_native_path_case(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000615 return x
616
maruel@chromium.org538141b2013-06-03 20:57:17 +0000617 def fix_and_blacklist_path(x, m):
618 """Receives a tuple (filepath, mode) and processes filepath."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000619 x = fix_path(x)
620 if not x:
621 return
622 # The blacklist needs to be reapplied, since path casing could
623 # influence blacklisting.
624 if self._blacklist(x):
625 return
maruel@chromium.org538141b2013-06-03 20:57:17 +0000626 # Filters out directories. Some may have passed through.
627 if os.path.isdir(x):
628 return
629 return x, m
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000630
maruel@chromium.org538141b2013-06-03 20:57:17 +0000631 # Renders all the files as strings, as some could be RelativePath
632 # instances. It is important to do it first since there could still be
633 # multiple entries with the same path but different modes.
634 rendered = (
635 fix_and_blacklist_path(f, m) for f, m in self.files.iteritems())
636 files = sorted(
637 (f for f in rendered if f),
638 key=lambda x: (x[0], Results.File.ACCEPTABLE_MODES.index(x[1])))
639 # Then converting into a dict will automatically clean up lesser
640 # important values.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000641 files = [
maruel@chromium.org538141b2013-06-03 20:57:17 +0000642 Results.File(None, f, False, None, m)
643 for f, m in dict(files).iteritems()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000644 ]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000645 return Results.Process(
646 self.pid,
647 files,
648 fix_path(self.executable),
649 self.command,
650 fix_path(self.initial_cwd),
651 [c.to_results_process() for c in self.children])
652
maruel@chromium.org538141b2013-06-03 20:57:17 +0000653 def add_file(self, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000654 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000655 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000656 return
maruel@chromium.org538141b2013-06-03 20:57:17 +0000657 logging.debug('add_file(%d, %s, %s)', self.pid, filepath, mode)
658 # Note that filepath and not render(filepath) is added. It is because
659 # filepath could be something else than a string, like a RelativePath
660 # instance for dtrace logs.
661 modes = Results.File.ACCEPTABLE_MODES
662 old_mode = self.files.setdefault(filepath, mode)
663 if old_mode != mode and modes.index(old_mode) < modes.index(mode):
664 # Take the highest value.
665 self.files[filepath] = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000666
667 def __init__(self, blacklist):
668 self.blacklist = blacklist
669 # Initial process.
670 self.root_process = None
671 # dict to accelerate process lookup, to not have to lookup the whole graph
672 # each time.
673 self._process_lookup = {}
674
675 class Tracer(object):
676 """During it's lifetime, the tracing subsystem is enabled."""
677 def __init__(self, logname):
678 self._logname = logname
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000679 self._lock = threading.RLock()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000680 self._traces = []
681 self._initialized = True
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000682 self._scripts_to_cleanup = []
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000683
684 def trace(self, cmd, cwd, tracename, output):
685 """Runs the OS-specific trace program on an executable.
686
687 Arguments:
688 - cmd: The command (a list) to run.
689 - cwd: Current directory to start the child process in.
690 - tracename: Name of the trace in the logname file.
691 - output: If False, redirects output to PIPEs.
692
693 Returns a tuple (resultcode, output) and updates the internal trace
694 entries.
695 """
696 # The implementation adds an item to self._traces.
697 raise NotImplementedError(self.__class__.__name__)
698
699 def close(self, _timeout=None):
700 """Saves the meta-data in the logname file.
701
702 For kernel-based tracing, stops the tracing subsystem.
703
704 Must not be used manually when using 'with' construct.
705 """
706 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000707 if not self._initialized:
708 raise TracingFailure(
709 'Called %s.close() on an unitialized object' %
710 self.__class__.__name__,
711 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000712 try:
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000713 while self._scripts_to_cleanup:
714 try:
715 os.remove(self._scripts_to_cleanup.pop())
716 except OSError as e:
717 logging.error('Failed to delete a temporary script: %s', e)
Marc-Antoine Ruelde011802013-11-12 15:19:47 -0500718 tools.write_json(self._logname, self._gen_logdata(), False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000719 finally:
720 self._initialized = False
721
722 def post_process_log(self):
723 """Post-processes the log so it becomes faster to load afterward.
724
725 Must not be used manually when using 'with' construct.
726 """
727 assert not self._initialized, 'Must stop tracing first.'
728
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000729 def _gen_logdata(self):
730 """Returns the data to be saved in the trace file."""
731 return {
732 'traces': self._traces,
733 }
734
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000735 def __enter__(self):
736 """Enables 'with' statement."""
737 return self
738
739 def __exit__(self, exc_type, exc_value, traceback):
740 """Enables 'with' statement."""
741 self.close()
742 # If an exception was thrown, do not process logs.
743 if not exc_type:
744 self.post_process_log()
745
746 def get_tracer(self, logname):
747 """Returns an ApiBase.Tracer instance.
748
749 Initializes the tracing subsystem, which is a requirement for kernel-based
750 tracers. Only one tracer instance should be live at a time!
751
752 logname is the filepath to the json file that will contain the meta-data
753 about the logs.
754 """
755 return self.Tracer(logname)
756
757 @staticmethod
758 def clean_trace(logname):
759 """Deletes an old log."""
760 raise NotImplementedError()
761
762 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +0000763 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000764 """Processes trace logs and returns the files opened and the files that do
765 not exist.
766
767 It does not track directories.
768
maruel@chromium.orgec74ff82012-10-29 18:14:47 +0000769 Arguments:
770 - logname: must be an absolute path.
771 - blacklist: must be a lambda.
772 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +0000773
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000774 Most of the time, files that do not exist are temporary test files that
775 should be put in /tmp instead. See http://crbug.com/116251.
776
777 Returns a list of dict with keys:
778 - results: A Results instance.
779 - trace: The corresponding tracename parameter provided to
780 get_tracer().trace().
781 - output: Output gathered during execution, if get_tracer().trace(...,
782 output=False) was used.
783 """
784 raise NotImplementedError(cls.__class__.__name__)
785
786
787class Strace(ApiBase):
788 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000789 @staticmethod
790 def load_filename(filename):
791 """Parses a filename in a log."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000792 # TODO(maruel): Be compatible with strace -x.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000793 assert isinstance(filename, str)
794 out = ''
795 i = 0
796 while i < len(filename):
797 c = filename[i]
798 if c == '\\':
799 out += chr(int(filename[i+1:i+4], 8))
800 i += 4
801 else:
802 out += c
803 i += 1
804 # TODO(maruel): That's not necessarily true that the current code page is
805 # utf-8.
806 return out.decode('utf-8')
807
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000808 class Context(ApiBase.Context):
809 """Processes a strace log line and keeps the list of existent and non
810 existent files accessed.
811
812 Ignores directories.
813
814 Uses late-binding to processes the cwd of each process. The problem is that
815 strace generates one log file per process it traced but doesn't give any
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000816 information about which process was started when and by who. So process the
817 logs out of order and use late binding with RelativePath to be able to
818 deduce the initial directory of each process once all the logs are parsed.
819
820 TODO(maruel): Use the script even in the non-sudo case, so log parsing can
821 be done in two phase: first find the root process, then process the child
822 processes in order. With that, it should be possible to not use RelativePath
823 anymore. This would significantly simplify the code!
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000824 """
825 class Process(ApiBase.Context.Process):
826 """Represents the state of a process.
827
828 Contains all the information retrieved from the pid-specific log.
829 """
830 # Function names are using ([a-z_0-9]+)
831 # This is the most common format. function(args) = result
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000832 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.*?)\)\s+= (.+)$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000833 # An interrupted function call, only grab the minimal header.
834 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
835 # A resumed function call.
836 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
837 # A process received a signal.
838 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
839 # A process didn't handle a signal. Ignore any junk appearing before,
840 # because the process was forcibly killed so it won't open any new file.
841 RE_KILLED = re.compile(
842 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
843 # The process has exited.
844 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
845 # A call was canceled. Ignore any prefix.
846 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
847 # Happens when strace fails to even get the function name.
848 UNNAMED_FUNCTION = '????'
849
850 # Corner-case in python, a class member function decorator must not be
851 # @staticmethod.
852 def parse_args(regexp, expect_zero): # pylint: disable=E0213
853 """Automatically convert the str 'args' into a list of processed
854 arguments.
855
856 Arguments:
857 - regexp is used to parse args.
858 - expect_zero: one of True, False or None.
859 - True: will check for result.startswith('0') first and will ignore
860 the trace line completely otherwise. This is important because for
861 many functions, the regexp will not process if the call failed.
862 - False: will check for not result.startswith(('?', '-1')) for the
863 same reason than with True.
864 - None: ignore result.
865 """
866 def meta_hook(function):
867 assert function.__name__.startswith('handle_')
868 def hook(self, args, result):
869 if expect_zero is True and not result.startswith('0'):
870 return
871 if expect_zero is False and result.startswith(('?', '-1')):
872 return
873 match = re.match(regexp, args)
874 if not match:
875 raise TracingFailure(
876 'Failed to parse %s(%s) = %s' %
877 (function.__name__[len('handle_'):], args, result),
878 None, None, None)
879 return function(self, match.groups(), result)
880 return hook
881 return meta_hook
882
883 class RelativePath(object):
884 """A late-bound relative path."""
885 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000886 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000887 self.parent = parent
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000888 assert (
889 value is None or
890 (isinstance(value, unicode) and not os.path.isabs(value)))
891 self.value = value
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000892 if self.value:
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000893 # TODO(maruel): On POSIX, '\\' is a valid character so remove this
894 # assert.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000895 assert '\\' not in self.value, value
896 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000897
898 def render(self):
899 """Returns the current directory this instance is representing.
900
901 This function is used to return the late-bound value.
902 """
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000903 assert self.parent is not None
904 parent = render(self.parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000905 if self.value:
906 return os.path.normpath(os.path.join(parent, self.value))
907 return parent
908
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000909 def __init__(self, root, pid):
910 """Keeps enough information to be able to guess the original process
911 root.
912
913 strace doesn't store which process was the initial process. So more
914 information needs to be kept so the graph can be reconstructed from the
915 flat map.
916 """
917 logging.info('%s(%d)' % (self.__class__.__name__, pid))
918 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
919 assert isinstance(root, ApiBase.Context)
920 self._root = weakref.ref(root)
921 # The dict key is the function name of the pending call, like 'open'
922 # or 'execve'.
923 self._pending_calls = {}
924 self._line_number = 0
925 # Current directory when the process started.
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000926 if isinstance(self._root(), unicode):
927 self.initial_cwd = self._root()
928 else:
929 self.initial_cwd = self.RelativePath(self._root(), None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000930 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +0000931 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000932
933 def get_cwd(self):
934 """Returns the best known value of cwd."""
935 return self.cwd or self.initial_cwd
936
937 def render(self):
938 """Returns the string value of the RelativePath() object.
939
940 Used by RelativePath. Returns the initial directory and not the
941 current one since the current directory 'cwd' validity is time-limited.
942
943 The validity is only guaranteed once all the logs are processed.
944 """
945 return self.initial_cwd.render()
946
947 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000948 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000949 self._line_number += 1
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000950 try:
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000951 if self._done:
952 raise TracingFailure(
953 'Found a trace for a terminated process or corrupted log',
954 None, None, None)
955
956 if self.RE_SIGNAL.match(line):
957 # Ignore signals.
958 return
959
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000960 match = self.RE_KILLED.match(line)
961 if match:
962 # Converts a '+++ killed by Foo +++' trace into an exit_group().
963 self.handle_exit_group(match.group(1), None)
964 return
965
966 match = self.RE_PROCESS_EXITED.match(line)
967 if match:
968 # Converts a '+++ exited with 1 +++' trace into an exit_group()
969 self.handle_exit_group(match.group(1), None)
970 return
971
972 match = self.RE_UNFINISHED.match(line)
973 if match:
974 if match.group(1) in self._pending_calls:
975 raise TracingFailure(
976 'Found two unfinished calls for the same function',
977 None, None, None,
978 self._pending_calls)
979 self._pending_calls[match.group(1)] = (
980 match.group(1) + match.group(2))
981 return
982
983 match = self.RE_UNAVAILABLE.match(line)
984 if match:
985 # This usually means a process was killed and a pending call was
986 # canceled.
987 # TODO(maruel): Look up the last exit_group() trace just above and
988 # make sure any self._pending_calls[anything] is properly flushed.
989 return
990
991 match = self.RE_RESUMED.match(line)
992 if match:
993 if match.group(1) not in self._pending_calls:
994 raise TracingFailure(
995 'Found a resumed call that was not logged as unfinished',
996 None, None, None,
997 self._pending_calls)
998 pending = self._pending_calls.pop(match.group(1))
999 # Reconstruct the line.
1000 line = pending + match.group(2)
1001
1002 match = self.RE_HEADER.match(line)
1003 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001004 # The line is corrupted. It happens occasionally when a process is
1005 # killed forcibly with activity going on. Assume the process died.
1006 # No other line can be processed afterward.
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001007 logging.debug('%d is done: %s', self.pid, line)
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001008 self._done = True
1009 return
1010
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001011 if match.group(1) == self.UNNAMED_FUNCTION:
1012 return
1013
1014 # It's a valid line, handle it.
1015 handler = getattr(self, 'handle_%s' % match.group(1), None)
1016 if not handler:
1017 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1018 return handler(match.group(2), match.group(3))
1019 except TracingFailure, e:
1020 # Hack in the values since the handler could be a static function.
1021 e.pid = self.pid
1022 e.line = line
1023 e.line_number = self._line_number
1024 # Re-raise the modified exception.
1025 raise
1026 except (KeyError, NotImplementedError, ValueError), e:
1027 raise TracingFailure(
1028 'Trace generated a %s exception: %s' % (
1029 e.__class__.__name__, str(e)),
1030 self.pid,
1031 self._line_number,
1032 line,
1033 e)
1034
1035 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1036 def handle_access(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001037 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001038
1039 @parse_args(r'^\"(.+?)\"$', True)
1040 def handle_chdir(self, args, _result):
1041 """Updates cwd."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001042 self.cwd = self._mangle(args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001043 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1044
maruel@chromium.org538141b2013-06-03 20:57:17 +00001045 @parse_args(r'^\"(.+?)\", (\d+), (\d+)$', False)
1046 def handle_chown(self, args, _result):
1047 # TODO(maruel): Look at result?
1048 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org55550722013-05-28 14:43:14 +00001049
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001050 def handle_clone(self, _args, result):
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001051 self._handling_forking('clone', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001052
1053 def handle_close(self, _args, _result):
1054 pass
1055
maruel@chromium.org538141b2013-06-03 20:57:17 +00001056 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1057 def handle_chmod(self, args, _result):
1058 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001059
maruel@chromium.org538141b2013-06-03 20:57:17 +00001060 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1061 def handle_creat(self, args, _result):
1062 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001063
1064 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1065 def handle_execve(self, args, _result):
1066 # Even if in practice execve() doesn't returns when it succeeds, strace
1067 # still prints '0' as the result.
1068 filepath = args[0]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001069 self._handle_file(filepath, Results.File.READ)
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001070 self.executable = self._mangle(filepath)
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001071 try:
1072 self.command = strace_process_quoted_arguments(args[1])
1073 except ValueError as e:
1074 raise TracingFailure(
1075 'Failed to process command line argument:\n%s' % e.args[0],
1076 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001077
1078 def handle_exit_group(self, _args, _result):
1079 """Removes cwd."""
1080 self.cwd = None
1081
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001082 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1083 def handle_faccessat(self, args, _results):
1084 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001085 self._handle_file(args[1], Results.File.TOUCHED)
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001086 else:
1087 raise Exception('Relative faccess not implemented.')
1088
maruel@chromium.org0781f322013-05-28 19:45:49 +00001089 def handle_fallocate(self, _args, result):
1090 pass
1091
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001092 def handle_fork(self, args, result):
1093 self._handle_unknown('fork', args, result)
1094
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001095 def handle_futex(self, _args, _result):
1096 pass
1097
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001098 @parse_args(r'^(\".+?\"|0x[a-f0-9]+), (\d+)$', False)
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001099 def handle_getcwd(self, args, _result):
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001100 # TODO(maruel): Resolve file handle.
1101 if not args[0].startswith('0x'):
1102 filepath = args[0][1:-1]
1103 if os.path.isabs(filepath):
1104 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1105 if not isinstance(self.cwd, unicode):
1106 # Take the occasion to reset the path.
1107 self.cwd = self._mangle(filepath)
1108 else:
1109 # It should always match.
1110 assert self.cwd == Strace.load_filename(filepath), (
1111 self.cwd, filepath)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001112
1113 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1114 def handle_link(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001115 self._handle_file(args[0], Results.File.READ)
1116 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001117
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001118 @parse_args(r'^(\".+?\"|0x[a-f0-9]+), \{(?:.+?, )?\.\.\.\}$', True)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001119 def handle_lstat(self, args, _result):
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001120 # TODO(maruel): Resolve file handle.
1121 if not args[0].startswith('0x'):
1122 self._handle_file(args[0][1:-1], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001123
1124 def handle_mkdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001125 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001126 pass
1127
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001128 @parse_args(r'^(\".*?\"|0x[a-f0-9]+), ([A-Z\_\|]+)(|, \d+)$', False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001129 def handle_open(self, args, _result):
1130 if 'O_DIRECTORY' in args[1]:
1131 return
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001132 # TODO(maruel): Resolve file handle.
1133 if not args[0].startswith('0x'):
1134 t = Results.File.READ if 'O_RDONLY' in args[1] else Results.File.WRITE
1135 self._handle_file(args[0][1:-1], t)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001136
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001137 @parse_args(
1138 r'^(\d+|AT_FDCWD), (\".*?\"|0x[a-f0-9]+), ([A-Z\_\|]+)(|, \d+)$',
1139 False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001140 def handle_openat(self, args, _result):
1141 if 'O_DIRECTORY' in args[2]:
1142 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001143 if args[0] == 'AT_FDCWD':
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001144 # TODO(maruel): Resolve file handle.
1145 if not args[1].startswith('0x'):
1146 t = (
1147 Results.File.READ if 'O_RDONLY' in args[2]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001148 else Results.File.WRITE)
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001149 self._handle_file(args[1][1:-1], t)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001150 else:
1151 # TODO(maruel): Implement relative open if necessary instead of the
1152 # AT_FDCWD flag, let's hope not since this means tracking all active
1153 # directory handles.
maruel@chromium.org538141b2013-06-03 20:57:17 +00001154 raise NotImplementedError('Relative open via openat not implemented.')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001155
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001156 @parse_args(
1157 r'^(\".+?\"|0x[a-f0-9]+), (?:\".+?\"(?:\.\.\.)?|0x[a-f0-9]+), '
1158 '\d+$',
1159 False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001160 def handle_readlink(self, args, _result):
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001161 # TODO(maruel): Resolve file handle.
1162 if not args[0].startswith('0x'):
1163 self._handle_file(args[0][1:-1], Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001164
1165 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1166 def handle_rename(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001167 self._handle_file(args[0], Results.File.READ)
1168 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001169
1170 def handle_rmdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001171 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001172 pass
1173
1174 def handle_setxattr(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001175 # TODO(maruel): self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001176 pass
1177
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001178 @parse_args(r'^(\".+?\"|0x[a-f0-9]+), \{.+?\}$', True)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001179 def handle_stat(self, args, _result):
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001180 # TODO(maruel): Resolve file handle.
1181 if not args[0].startswith('0x'):
1182 self._handle_file(args[0][1:-1], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001183
maruel@chromium.org538141b2013-06-03 20:57:17 +00001184 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1185 def handle_symlink(self, args, _result):
1186 self._handle_file(args[0], Results.File.TOUCHED)
1187 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001188
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001189 @parse_args(r'^\"(.+?)\", \d+', True)
1190 def handle_truncate(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001191 self._handle_file(args[0], Results.File.WRITE)
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001192
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001193 def handle_unlink(self, _args, _result):
1194 # In theory, the file had to be created anyway.
1195 pass
1196
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001197 def handle_unlinkat(self, _args, _result):
1198 # In theory, the file had to be created anyway.
1199 pass
1200
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001201 def handle_statfs(self, _args, _result):
1202 pass
1203
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001204 def handle_utimensat(self, _args, _result):
1205 pass
1206
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001207 def handle_vfork(self, _args, result):
1208 self._handling_forking('vfork', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001209
1210 @staticmethod
1211 def _handle_unknown(function, args, result):
1212 raise TracingFailure(
1213 'Unexpected/unimplemented trace %s(%s)= %s' %
1214 (function, args, result),
1215 None, None, None)
1216
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001217 def _handling_forking(self, name, result):
1218 """Transfers cwd."""
1219 if result.startswith(('?', '-1')):
1220 # The call failed.
1221 return
1222 # Update the other process right away.
1223 childpid = int(result)
1224 child = self._root().get_or_set_proc(childpid)
1225 if child.parentid is not None or childpid in self.children:
1226 raise TracingFailure(
1227 'Found internal inconsitency in process lifetime detection '
1228 'during a %s() call' % name,
1229 None, None, None)
1230
1231 # Copy the cwd object.
1232 child.initial_cwd = self.get_cwd()
1233 child.parentid = self.pid
1234 # It is necessary because the logs are processed out of order.
1235 self.children.append(child)
1236
maruel@chromium.org538141b2013-06-03 20:57:17 +00001237 def _handle_file(self, filepath, mode):
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001238 filepath = self._mangle(filepath)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001239 self.add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001240
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001241 def _mangle(self, filepath):
1242 """Decodes a filepath found in the log and convert it to a late-bound
1243 path if necessary.
1244
1245 |filepath| is an strace 'encoded' string and the returned value is
1246 either an unicode string if the path was absolute or a late bound path
1247 otherwise.
1248 """
1249 filepath = Strace.load_filename(filepath)
1250 if os.path.isabs(filepath):
1251 return filepath
1252 else:
1253 if isinstance(self.get_cwd(), unicode):
1254 return os.path.normpath(os.path.join(self.get_cwd(), filepath))
1255 return self.RelativePath(self.get_cwd(), filepath)
1256
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001257 def __init__(self, blacklist, root_pid, initial_cwd):
1258 """|root_pid| may be None when the root process is not known.
1259
1260 In that case, a search is done after reading all the logs to figure out
1261 the root process.
1262 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001263 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001264 assert_is_renderable(initial_cwd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001265 self.root_pid = root_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001266 self.initial_cwd = initial_cwd
1267
1268 def render(self):
1269 """Returns the string value of the initial cwd of the root process.
1270
1271 Used by RelativePath.
1272 """
1273 return self.initial_cwd
1274
1275 def on_line(self, pid, line):
1276 """Transfers control into the Process.on_line() function."""
1277 self.get_or_set_proc(pid).on_line(line.strip())
1278
1279 def to_results(self):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001280 """If necessary, finds back the root process and verify consistency."""
1281 if not self.root_pid:
1282 # The non-sudo case. The traced process was started by strace itself,
1283 # so the pid of the traced process is not known.
1284 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1285 if len(root) == 1:
1286 self.root_process = root[0]
1287 # Save it for later.
1288 self.root_pid = self.root_process.pid
1289 else:
1290 # The sudo case. The traced process was started manually so its pid is
1291 # known.
1292 self.root_process = self._process_lookup.get(self.root_pid)
1293 if not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001294 raise TracingFailure(
1295 'Found internal inconsitency in process lifetime detection '
1296 'while finding the root process',
1297 None,
1298 None,
1299 None,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001300 self.root_pid,
1301 sorted(self._process_lookup))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001302 process = self.root_process.to_results_process()
1303 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1304 raise TracingFailure(
1305 'Found internal inconsitency in process lifetime detection '
1306 'while looking for len(tree) == len(list)',
1307 None,
1308 None,
1309 None,
1310 sorted(self._process_lookup),
1311 sorted(p.pid for p in process.all))
1312 return Results(process)
1313
1314 def get_or_set_proc(self, pid):
1315 """Returns the Context.Process instance for this pid or creates a new one.
1316 """
1317 if not pid or not isinstance(pid, int):
1318 raise TracingFailure(
1319 'Unpexpected value for pid: %r' % pid,
1320 pid,
1321 None,
1322 None,
1323 pid)
1324 if pid not in self._process_lookup:
1325 self._process_lookup[pid] = self.Process(self, pid)
1326 return self._process_lookup[pid]
1327
1328 @classmethod
1329 def traces(cls):
1330 """Returns the list of all handled traces to pass this as an argument to
1331 strace.
1332 """
1333 prefix = 'handle_'
1334 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1335
1336 class Tracer(ApiBase.Tracer):
1337 MAX_LEN = 256
1338
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001339 def __init__(self, logname, use_sudo):
1340 super(Strace.Tracer, self).__init__(logname)
1341 self.use_sudo = use_sudo
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001342 if use_sudo:
1343 # TODO(maruel): Use the jump script systematically to make it easy to
1344 # figure out the root process, so RelativePath is not necessary anymore.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001345 self._child_script = create_exec_thunk()
1346 self._scripts_to_cleanup.append(self._child_script)
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001347
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001348 def trace(self, cmd, cwd, tracename, output):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001349 """Runs strace on an executable.
1350
1351 When use_sudo=True, it is a 3-phases process: start the thunk, start
1352 sudo strace with the pid of the thunk and then have the thunk os.execve()
1353 the process to trace.
1354 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001355 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1356 assert os.path.isabs(cmd[0]), cmd[0]
1357 assert os.path.isabs(cwd), cwd
1358 assert os.path.normpath(cwd) == cwd, cwd
1359 with self._lock:
1360 if not self._initialized:
1361 raise TracingFailure(
1362 'Called Tracer.trace() on an unitialized object',
1363 None, None, None, tracename)
1364 assert tracename not in (i['trace'] for i in self._traces)
1365 stdout = stderr = None
1366 if output:
1367 stdout = subprocess.PIPE
1368 stderr = subprocess.STDOUT
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001369
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001370 # Ensure all file related APIs are hooked.
1371 traces = ','.join(Strace.Context.traces() + ['file'])
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001372 flags = [
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001373 # Each child process has its own trace file. It is necessary because
1374 # strace may generate corrupted log file if multiple processes are
1375 # heavily doing syscalls simultaneously.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001376 '-ff',
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001377 # Reduce whitespace usage.
1378 '-a1',
1379 # hex encode non-ascii strings.
1380 # TODO(maruel): '-x',
1381 # TODO(maruel): '-ttt',
1382 # Signals are unnecessary noise here. Note the parser can cope with them
1383 # but reduce the unnecessary output.
1384 '-esignal=none',
1385 # Print as much data as wanted.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001386 '-s', '%d' % self.MAX_LEN,
1387 '-e', 'trace=%s' % traces,
1388 '-o', self._logname + '.' + tracename,
1389 ]
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001390
1391 if self.use_sudo:
1392 pipe_r, pipe_w = os.pipe()
1393 # Start the child process paused.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001394 target_cmd = [sys.executable, self._child_script, str(pipe_r)] + cmd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001395 logging.debug(' '.join(target_cmd))
1396 child_proc = subprocess.Popen(
1397 target_cmd,
1398 stdin=subprocess.PIPE,
1399 stdout=stdout,
1400 stderr=stderr,
1401 cwd=cwd)
1402
1403 # TODO(maruel): both processes must use the same UID for it to work
1404 # without sudo. Look why -p is failing at the moment without sudo.
1405 trace_cmd = [
1406 'sudo',
1407 'strace',
1408 '-p', str(child_proc.pid),
1409 ] + flags
1410 logging.debug(' '.join(trace_cmd))
1411 strace_proc = subprocess.Popen(
1412 trace_cmd,
1413 cwd=cwd,
1414 stdin=subprocess.PIPE,
1415 stdout=subprocess.PIPE,
1416 stderr=subprocess.PIPE)
1417
1418 line = strace_proc.stderr.readline()
1419 if not re.match(r'^Process \d+ attached \- interrupt to quit$', line):
1420 # TODO(maruel): Raise an exception.
1421 assert False, line
1422
1423 # Now fire the child process.
1424 os.write(pipe_w, 'x')
1425
1426 out = child_proc.communicate()[0]
1427 strace_out = strace_proc.communicate()[0]
1428
1429 # TODO(maruel): if strace_proc.returncode: Add an exception.
1430 saved_out = strace_out if strace_proc.returncode else out
1431 root_pid = child_proc.pid
1432 else:
1433 # Non-sudo case.
1434 trace_cmd = [
1435 'strace',
1436 ] + flags + cmd
1437 logging.debug(' '.join(trace_cmd))
1438 child_proc = subprocess.Popen(
1439 trace_cmd,
1440 cwd=cwd,
1441 stdin=subprocess.PIPE,
1442 stdout=stdout,
1443 stderr=stderr)
1444 out = child_proc.communicate()[0]
1445 # TODO(maruel): Walk the logs and figure out the root process would
1446 # simplify parsing the logs a *lot*.
1447 saved_out = out
1448 # The trace reader will have to figure out.
1449 root_pid = None
1450
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001451 with self._lock:
1452 assert tracename not in (i['trace'] for i in self._traces)
1453 self._traces.append(
1454 {
1455 'cmd': cmd,
1456 'cwd': cwd,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001457 'output': saved_out,
1458 'pid': root_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001459 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001460 })
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001461 return child_proc.returncode, out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001462
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001463 def __init__(self, use_sudo=None):
1464 super(Strace, self).__init__()
1465 self.use_sudo = use_sudo
1466
1467 def get_tracer(self, logname):
1468 return self.Tracer(logname, self.use_sudo)
1469
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001470 @staticmethod
1471 def clean_trace(logname):
1472 if os.path.isfile(logname):
1473 os.remove(logname)
1474 # Also delete any pid specific file from previous traces.
1475 for i in glob.iglob(logname + '.*'):
1476 if i.rsplit('.', 1)[1].isdigit():
1477 os.remove(i)
1478
1479 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001480 def parse_log(cls, logname, blacklist, trace_name):
1481 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001482 assert os.path.isabs(logname)
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05001483 data = tools.read_json(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001484 out = []
1485 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001486 if trace_name and item['trace'] != trace_name:
1487 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001488 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001489 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001490 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001491 }
1492 try:
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001493 context = cls.Context(blacklist, item['pid'], item['cwd'])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001494 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001495 logging.debug('Reading %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001496 pid = pidfile.rsplit('.', 1)[1]
1497 if pid.isdigit():
1498 pid = int(pid)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001499 found_line = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001500 for line in open(pidfile, 'rb'):
1501 context.on_line(pid, line)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001502 found_line = True
1503 if not found_line:
1504 # Ensures that a completely empty trace still creates the
1505 # corresponding Process instance by logging a dummy line.
1506 context.on_line(pid, '')
1507 else:
1508 logging.warning('Found unexpected file %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001509 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001510 except TracingFailure:
1511 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001512 out.append(result)
1513 return out
1514
1515
1516class Dtrace(ApiBase):
1517 """Uses DTrace framework through dtrace. Requires root access.
1518
1519 Implies Mac OSX.
1520
1521 dtruss can't be used because it has compatibility issues with python.
1522
1523 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1524 get the absolute path of the 'cwd' dtrace variable from the probe.
1525
1526 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1527 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1528 manually.
1529
1530 errno is not printed in the log since this implementation currently only cares
1531 about files that were successfully opened.
1532 """
1533 class Context(ApiBase.Context):
1534 # Format: index pid function(args)
1535 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1536
1537 # Arguments parsing.
1538 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1539 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1540 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1541 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1542 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1543 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1544
maruel@chromium.orgac361162013-06-04 15:54:06 +00001545 # O_DIRECTORY is not defined on Windows and dtrace doesn't exist on Windows.
1546 O_DIRECTORY = os.O_DIRECTORY if hasattr(os, 'O_DIRECTORY') else None
maruel@chromium.org538141b2013-06-03 20:57:17 +00001547 O_RDWR = os.O_RDWR
1548 O_WRONLY = os.O_WRONLY
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001549
1550 class Process(ApiBase.Context.Process):
1551 def __init__(self, *args):
1552 super(Dtrace.Context.Process, self).__init__(*args)
1553 self.cwd = self.initial_cwd
1554
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001555 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001556 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001557 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001558 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001559 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001560 # Process ID of the temporary script created by create_subprocess_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001561 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001562 self._initial_cwd = initial_cwd
1563 self._line_number = 0
1564
1565 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001566 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001567 self._line_number += 1
1568 match = self.RE_HEADER.match(line)
1569 if not match:
1570 raise TracingFailure(
1571 'Found malformed line: %s' % line,
1572 None,
1573 self._line_number,
1574 line)
1575 fn = getattr(
1576 self,
1577 'handle_%s' % match.group(2).replace('-', '_'),
1578 self._handle_ignored)
1579 # It is guaranteed to succeed because of the regexp. Or at least I thought
1580 # it would.
1581 pid = int(match.group(1))
1582 try:
1583 return fn(pid, match.group(3))
1584 except TracingFailure, e:
1585 # Hack in the values since the handler could be a static function.
1586 e.pid = pid
1587 e.line = line
1588 e.line_number = self._line_number
1589 # Re-raise the modified exception.
1590 raise
1591 except (KeyError, NotImplementedError, ValueError), e:
1592 raise TracingFailure(
1593 'Trace generated a %s exception: %s' % (
1594 e.__class__.__name__, str(e)),
1595 pid,
1596 self._line_number,
1597 line,
1598 e)
1599
1600 def to_results(self):
1601 process = self.root_process.to_results_process()
1602 # Internal concistency check.
1603 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1604 raise TracingFailure(
1605 'Found internal inconsitency in process lifetime detection '
1606 'while looking for len(tree) == len(list)',
1607 None,
1608 None,
1609 None,
1610 sorted(self._process_lookup),
1611 sorted(p.pid for p in process.all))
1612 return Results(process)
1613
1614 def handle_dtrace_BEGIN(self, _pid, args):
1615 if not self.RE_DTRACE_BEGIN.match(args):
1616 raise TracingFailure(
1617 'Found internal inconsitency in dtrace_BEGIN log line',
1618 None, None, None)
1619
1620 def handle_proc_start(self, pid, args):
1621 """Transfers cwd.
1622
1623 The dtrace script already takes care of only tracing the processes that
1624 are child of the traced processes so there is no need to verify the
1625 process hierarchy.
1626 """
1627 if pid in self._process_lookup:
1628 raise TracingFailure(
1629 'Found internal inconsitency in proc_start: %d started two times' %
1630 pid,
1631 None, None, None)
1632 match = self.RE_PROC_START.match(args)
1633 if not match:
1634 raise TracingFailure(
1635 'Failed to parse arguments: %s' % args,
1636 None, None, None)
1637 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001638 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001639 proc = self.root_process = self.Process(
1640 self.blacklist, pid, self._initial_cwd)
1641 elif ppid in self._process_lookup:
1642 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1643 self._process_lookup[ppid].children.append(proc)
1644 else:
1645 # Another process tree, ignore.
1646 return
1647 self._process_lookup[pid] = proc
1648 logging.debug(
1649 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001650 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001651
1652 def handle_proc_exit(self, pid, _args):
1653 """Removes cwd."""
1654 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001655 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001656 self._process_lookup[pid].cwd = None
1657
1658 def handle_execve(self, pid, args):
1659 """Sets the process' executable.
1660
1661 TODO(maruel): Read command line arguments. See
1662 https://discussions.apple.com/thread/1980539 for an example.
1663 https://gist.github.com/1242279
1664
1665 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1666 :)
1667 """
1668 if not pid in self._process_lookup:
1669 # Another process tree, ignore.
1670 return
1671 match = self.RE_EXECVE.match(args)
1672 if not match:
1673 raise TracingFailure(
1674 'Failed to parse arguments: %r' % args,
1675 None, None, None)
1676 proc = self._process_lookup[pid]
1677 proc.executable = match.group(1)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001678 self._handle_file(pid, proc.executable, Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001679 proc.command = self.process_escaped_arguments(match.group(3))
1680 if int(match.group(2)) != len(proc.command):
1681 raise TracingFailure(
1682 'Failed to parse execve() arguments: %s' % args,
1683 None, None, None)
1684
1685 def handle_chdir(self, pid, args):
1686 """Updates cwd."""
1687 if pid not in self._process_lookup:
1688 # Another process tree, ignore.
1689 return
1690 cwd = self.RE_CHDIR.match(args).group(1)
1691 if not cwd.startswith('/'):
1692 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1693 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1694 else:
1695 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1696 cwd2 = cwd
1697 self._process_lookup[pid].cwd = cwd2
1698
1699 def handle_open_nocancel(self, pid, args):
1700 """Redirects to handle_open()."""
1701 return self.handle_open(pid, args)
1702
1703 def handle_open(self, pid, args):
1704 if pid not in self._process_lookup:
1705 # Another process tree, ignore.
1706 return
1707 match = self.RE_OPEN.match(args)
1708 if not match:
1709 raise TracingFailure(
1710 'Failed to parse arguments: %s' % args,
1711 None, None, None)
1712 flag = int(match.group(2), 16)
1713 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1714 # Ignore directories.
1715 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001716 self._handle_file(
1717 pid,
1718 match.group(1),
1719 Results.File.READ if not ((self.O_RDWR | self.O_WRONLY) & flag)
1720 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001721
1722 def handle_rename(self, pid, args):
1723 if pid not in self._process_lookup:
1724 # Another process tree, ignore.
1725 return
1726 match = self.RE_RENAME.match(args)
1727 if not match:
1728 raise TracingFailure(
1729 'Failed to parse arguments: %s' % args,
1730 None, None, None)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001731 self._handle_file(pid, match.group(1), Results.File.READ)
1732 self._handle_file(pid, match.group(2), Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001733
maruel@chromium.org538141b2013-06-03 20:57:17 +00001734 def _handle_file(self, pid, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001735 if not filepath.startswith('/'):
1736 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1737 # We can get '..' in the path.
1738 filepath = os.path.normpath(filepath)
1739 # Sadly, still need to filter out directories here;
1740 # saw open_nocancel(".", 0, 0) = 0 lines.
1741 if os.path.isdir(filepath):
1742 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001743 self._process_lookup[pid].add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001744
1745 def handle_ftruncate(self, pid, args):
1746 """Just used as a signal to kill dtrace, ignoring."""
1747 pass
1748
1749 @staticmethod
1750 def _handle_ignored(pid, args):
1751 """Is called for all the event traces that are not handled."""
1752 raise NotImplementedError('Please implement me')
1753
1754 @staticmethod
1755 def process_escaped_arguments(text):
1756 """Extracts escaped arguments on a string and return the arguments as a
1757 list.
1758
1759 Implemented as an automaton.
1760
1761 Example:
1762 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1763 function will return ['python2.7', '-c', 'print("hi")]
1764 """
1765 if not text.endswith('\\0'):
1766 raise ValueError('String is not null terminated: %r' % text, text)
1767 text = text[:-2]
1768
1769 def unescape(x):
1770 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1771 out = []
1772 escaped = False
1773 for i in x:
1774 if i == '\\' and not escaped:
1775 escaped = True
1776 continue
1777 escaped = False
1778 out.append(i)
1779 return ''.join(out)
1780
1781 return [unescape(i) for i in text.split('\\001')]
1782
1783 class Tracer(ApiBase.Tracer):
1784 # pylint: disable=C0301
1785 #
1786 # To understand the following code, you'll want to take a look at:
1787 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
1788 # https://wikis.oracle.com/display/DTrace/Variables
1789 # http://docs.oracle.com/cd/E19205-01/820-4221/
1790 #
1791 # 0. Dump all the valid probes into a text file. It is important, you
1792 # want to redirect into a file and you don't want to constantly 'sudo'.
1793 # $ sudo dtrace -l > probes.txt
1794 #
1795 # 1. Count the number of probes:
1796 # $ wc -l probes.txt
1797 # 81823 # On OSX 10.7, including 1 header line.
1798 #
1799 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1800 # likes and skipping the header with NR>1:
1801 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1802 # dtrace
1803 # fbt
1804 # io
1805 # ip
1806 # lockstat
1807 # mach_trap
1808 # proc
1809 # profile
1810 # sched
1811 # syscall
1812 # tcp
1813 # vminfo
1814 #
1815 # 3. List of valid probes:
1816 # $ grep syscall probes.txt | less
1817 # or use dtrace directly:
1818 # $ sudo dtrace -l -P syscall | less
1819 #
1820 # trackedpid is an associative array where its value can be 0, 1 or 2.
1821 # 0 is for untracked processes and is the default value for items not
1822 # in the associative array.
1823 # 1 is for tracked processes.
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001824 # 2 is for the script created by create_subprocess_thunk() only. It is not
1825 # tracked itself but all its decendants are.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001826 #
1827 # The script will kill itself only once waiting_to_die == 1 and
1828 # current_processes == 0, so that both getlogin() was called and that
1829 # all traced processes exited.
1830 #
1831 # TODO(maruel): Use cacheable predicates. See
1832 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1833 D_CODE = """
1834 dtrace:::BEGIN {
1835 waiting_to_die = 0;
1836 current_processes = 0;
1837 logindex = 0;
1838 printf("%d %d %s_%s(\\"%s\\")\\n",
1839 logindex, PID, probeprov, probename, SCRIPT);
1840 logindex++;
1841 }
1842
1843 proc:::start /trackedpid[ppid]/ {
1844 trackedpid[pid] = 1;
1845 current_processes += 1;
1846 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1847 logindex, pid, probeprov, probename,
1848 ppid,
1849 execname,
1850 current_processes);
1851 logindex++;
1852 }
1853 /* Should use SCRIPT but there is no access to this variable at that
1854 * point. */
1855 proc:::start /ppid == PID && execname == "Python"/ {
1856 trackedpid[pid] = 2;
1857 current_processes += 1;
1858 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1859 logindex, pid, probeprov, probename,
1860 ppid,
1861 execname,
1862 current_processes);
1863 logindex++;
1864 }
1865 proc:::exit /trackedpid[pid] &&
1866 current_processes == 1 &&
1867 waiting_to_die == 1/ {
1868 trackedpid[pid] = 0;
1869 current_processes -= 1;
1870 printf("%d %d %s_%s(%d)\\n",
1871 logindex, pid, probeprov, probename,
1872 current_processes);
1873 logindex++;
1874 exit(0);
1875 }
1876 proc:::exit /trackedpid[pid]/ {
1877 trackedpid[pid] = 0;
1878 current_processes -= 1;
1879 printf("%d %d %s_%s(%d)\\n",
1880 logindex, pid, probeprov, probename,
1881 current_processes);
1882 logindex++;
1883 }
1884
1885 /* Use an arcane function to detect when we need to die */
1886 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
1887 waiting_to_die = 1;
1888 printf("%d %d %s()\\n", logindex, pid, probefunc);
1889 logindex++;
1890 }
1891 syscall::ftruncate:entry /
1892 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
1893 exit(0);
1894 }
1895
1896 syscall::open*:entry /trackedpid[pid] == 1/ {
1897 self->open_arg0 = arg0;
1898 self->open_arg1 = arg1;
1899 self->open_arg2 = arg2;
1900 }
1901 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
1902 this->open_arg0 = copyinstr(self->open_arg0);
1903 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
1904 logindex, pid, probefunc,
1905 this->open_arg0,
1906 self->open_arg1,
1907 self->open_arg2);
1908 logindex++;
1909 this->open_arg0 = 0;
1910 }
1911 syscall::open*:return /trackedpid[pid] == 1/ {
1912 self->open_arg0 = 0;
1913 self->open_arg1 = 0;
1914 self->open_arg2 = 0;
1915 }
1916
1917 syscall::rename:entry /trackedpid[pid] == 1/ {
1918 self->rename_arg0 = arg0;
1919 self->rename_arg1 = arg1;
1920 }
1921 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
1922 this->rename_arg0 = copyinstr(self->rename_arg0);
1923 this->rename_arg1 = copyinstr(self->rename_arg1);
1924 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
1925 logindex, pid, probefunc,
1926 this->rename_arg0,
1927 this->rename_arg1);
1928 logindex++;
1929 this->rename_arg0 = 0;
1930 this->rename_arg1 = 0;
1931 }
1932 syscall::rename:return /trackedpid[pid] == 1/ {
1933 self->rename_arg0 = 0;
1934 self->rename_arg1 = 0;
1935 }
1936
1937 /* Track chdir, it's painful because it is only receiving relative path.
1938 */
1939 syscall::chdir:entry /trackedpid[pid] == 1/ {
1940 self->chdir_arg0 = arg0;
1941 }
1942 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
1943 this->chdir_arg0 = copyinstr(self->chdir_arg0);
1944 printf("%d %d %s(\\"%s\\")\\n",
1945 logindex, pid, probefunc,
1946 this->chdir_arg0);
1947 logindex++;
1948 this->chdir_arg0 = 0;
1949 }
1950 syscall::chdir:return /trackedpid[pid] == 1/ {
1951 self->chdir_arg0 = 0;
1952 }
1953 """
1954
1955 # execve-specific code, tends to throw a lot of exceptions.
1956 D_CODE_EXECVE = """
1957 /* Finally what we care about! */
1958 syscall::exec*:entry /trackedpid[pid]/ {
1959 self->exec_arg0 = copyinstr(arg0);
1960 /* Incrementally probe for a NULL in the argv parameter of execve() to
1961 * figure out argc. */
1962 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
1963 * found. */
1964 self->exec_argc = 0;
1965 /* Probe for argc==1 */
1966 this->exec_argv = (user_addr_t*)copyin(
1967 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1968 self->exec_argc = this->exec_argv[self->exec_argc] ?
1969 (self->exec_argc + 1) : self->exec_argc;
1970
1971 /* Probe for argc==2 */
1972 this->exec_argv = (user_addr_t*)copyin(
1973 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1974 self->exec_argc = this->exec_argv[self->exec_argc] ?
1975 (self->exec_argc + 1) : self->exec_argc;
1976
1977 /* Probe for argc==3 */
1978 this->exec_argv = (user_addr_t*)copyin(
1979 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1980 self->exec_argc = this->exec_argv[self->exec_argc] ?
1981 (self->exec_argc + 1) : self->exec_argc;
1982
1983 /* Probe for argc==4 */
1984 this->exec_argv = (user_addr_t*)copyin(
1985 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1986 self->exec_argc = this->exec_argv[self->exec_argc] ?
1987 (self->exec_argc + 1) : self->exec_argc;
1988
1989 /* Copy the inputs strings since there is no guarantee they'll be
1990 * present after the call completed. */
1991 self->exec_argv0 = (self->exec_argc > 0) ?
1992 copyinstr(this->exec_argv[0]) : "";
1993 self->exec_argv1 = (self->exec_argc > 1) ?
1994 copyinstr(this->exec_argv[1]) : "";
1995 self->exec_argv2 = (self->exec_argc > 2) ?
1996 copyinstr(this->exec_argv[2]) : "";
1997 self->exec_argv3 = (self->exec_argc > 3) ?
1998 copyinstr(this->exec_argv[3]) : "";
1999 this->exec_argv = 0;
2000 }
2001 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2002 /* We need to join strings here, as using multiple printf() would
2003 * cause tearing when multiple threads/processes are traced.
2004 * Since it is impossible to escape a string and join it to another one,
2005 * like sprintf("%s%S", previous, more), use hackery.
2006 * Each of the elements are split with a \\1. \\0 cannot be used because
2007 * it is simply ignored. This will conflict with any program putting a
2008 * \\1 in their execve() string but this should be "rare enough" */
2009 this->args = "";
2010 /* Process exec_argv[0] */
2011 this->args = strjoin(
2012 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2013
2014 /* Process exec_argv[1] */
2015 this->args = strjoin(
2016 this->args, (self->exec_argc > 1) ? "\\1" : "");
2017 this->args = strjoin(
2018 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2019
2020 /* Process exec_argv[2] */
2021 this->args = strjoin(
2022 this->args, (self->exec_argc > 2) ? "\\1" : "");
2023 this->args = strjoin(
2024 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2025
2026 /* Process exec_argv[3] */
2027 this->args = strjoin(
2028 this->args, (self->exec_argc > 3) ? "\\1" : "");
2029 this->args = strjoin(
2030 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2031
2032 /* Prints self->exec_argc to permits verifying the internal
2033 * consistency since this code is quite fishy. */
2034 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2035 logindex, pid, probefunc,
2036 self->exec_arg0,
2037 self->exec_argc,
2038 this->args);
2039 logindex++;
2040 this->args = 0;
2041 }
2042 syscall::exec*:return /trackedpid[pid]/ {
2043 self->exec_arg0 = 0;
2044 self->exec_argc = 0;
2045 self->exec_argv0 = 0;
2046 self->exec_argv1 = 0;
2047 self->exec_argv2 = 0;
2048 self->exec_argv3 = 0;
2049 }
2050 """
2051
2052 # Code currently not used.
2053 D_EXTRANEOUS = """
2054 /* This is a good learning experience, since it traces a lot of things
2055 * related to the process and child processes.
2056 * Warning: it generates a gigantic log. For example, tracing
2057 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2058 * several minutes to execute.
2059 */
2060 /*
2061 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2062 printf("%d %d %s_%s() = %d\\n",
2063 logindex, pid, probeprov, probefunc, errno);
2064 logindex++;
2065 }
2066 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2067 printf("%d %d %s_%s() = %d\\n",
2068 logindex, pid, probeprov, probefunc, errno);
2069 logindex++;
2070 }
2071 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2072 printf("%d %d %s_%s() = %d\\n",
2073 logindex, pid, probeprov, probefunc, errno);
2074 logindex++;
2075 }
2076 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2077 printf("%d %d %s_%s() = %d\\n",
2078 logindex, pid, probeprov, probefunc, errno);
2079 logindex++;
2080 }
2081 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2082 printf("%d %d %s_%s() = %d\\n",
2083 logindex, pid, probeprov, probefunc, errno);
2084 logindex++;
2085 }
2086 */
2087 /* TODO(maruel): *stat* functions and friends
2088 syscall::access:return,
2089 syscall::chdir:return,
2090 syscall::chflags:return,
2091 syscall::chown:return,
2092 syscall::chroot:return,
2093 syscall::getattrlist:return,
2094 syscall::getxattr:return,
2095 syscall::lchown:return,
2096 syscall::lstat64:return,
2097 syscall::lstat:return,
2098 syscall::mkdir:return,
2099 syscall::pathconf:return,
2100 syscall::readlink:return,
2101 syscall::removexattr:return,
2102 syscall::setxattr:return,
2103 syscall::stat64:return,
2104 syscall::stat:return,
2105 syscall::truncate:return,
2106 syscall::unlink:return,
2107 syscall::utimes:return,
2108 */
2109 """
2110
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002111 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002112 """Starts the log collection with dtrace.
2113
2114 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2115 this needs to wait for dtrace to be "warmed up".
2116 """
2117 super(Dtrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002118 # This script is used as a signal to figure out the root process.
2119 self._signal_script = create_subprocess_thunk()
2120 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002121 # This unique dummy temp file is used to signal the dtrace script that it
2122 # should stop as soon as all the child processes are done. A bit hackish
2123 # but works fine enough.
2124 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
Marc-Antoine Ruel3c979cb2015-03-11 13:43:28 -04002125 prefix=u'trace_signal_file')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002126
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002127 dtrace_path = '/usr/sbin/dtrace'
2128 if not os.path.isfile(dtrace_path):
2129 dtrace_path = 'dtrace'
2130 elif use_sudo is None and (os.stat(dtrace_path).st_mode & stat.S_ISUID):
2131 # No need to sudo. For those following at home, don't do that.
2132 use_sudo = False
2133
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002134 # Note: do not use the -p flag. It's useless if the initial process quits
2135 # too fast, resulting in missing traces from the grand-children. The D
2136 # code manages the dtrace lifetime itself.
2137 trace_cmd = [
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002138 dtrace_path,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002139 # Use a larger buffer if getting 'out of scratch space' errors.
2140 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2141 '-b', '10m',
2142 '-x', 'dynvarsize=10m',
2143 #'-x', 'dtrace_global_maxsize=1m',
2144 '-x', 'evaltime=exec',
2145 '-o', '/dev/stderr',
2146 '-q',
2147 '-n', self._get_dtrace_code(),
2148 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002149 if use_sudo is not False:
2150 trace_cmd.insert(0, 'sudo')
2151
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002152 with open(self._logname + '.log', 'wb') as logfile:
2153 self._dtrace = subprocess.Popen(
2154 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2155 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2156
2157 # Reads until one line is printed, which signifies dtrace is up and ready.
2158 with open(self._logname + '.log', 'rb') as logfile:
2159 while 'dtrace_BEGIN' not in logfile.readline():
2160 if self._dtrace.poll() is not None:
2161 # Do a busy wait. :/
2162 break
2163 logging.debug('dtrace started')
2164
2165 def _get_dtrace_code(self):
2166 """Setups the D code to implement child process tracking.
2167
2168 Injects the cookie in the script so it knows when to stop.
2169
2170 The script will detect any instance of the script created with
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002171 create_subprocess_thunk() and will start tracing it.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002172 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002173 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002174 'inline int PID = %d;\n'
2175 'inline string SCRIPT = "%s";\n'
2176 'inline int FILE_ID = %d;\n'
2177 '\n'
2178 '%s') % (
2179 os.getpid(),
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002180 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002181 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002182 self.D_CODE)
2183 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2184 # Do not enable by default since it tends to spew dtrace: error lines
2185 # because the execve() parameters are not in valid memory at the time of
2186 # logging.
2187 # TODO(maruel): Find a way to make this reliable since it's useful but
2188 # only works in limited/trivial uses cases for now.
2189 out += self.D_CODE_EXECVE
2190 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002191
2192 def trace(self, cmd, cwd, tracename, output):
2193 """Runs dtrace on an executable.
2194
2195 This dtruss is broken when it starts the process itself or when tracing
2196 child processes, this code starts a wrapper process
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002197 generated with create_subprocess_thunk() which starts the executable to
2198 trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002199 """
2200 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2201 assert os.path.isabs(cmd[0]), cmd[0]
2202 assert os.path.isabs(cwd), cwd
2203 assert os.path.normpath(cwd) == cwd, cwd
2204 with self._lock:
2205 if not self._initialized:
2206 raise TracingFailure(
2207 'Called Tracer.trace() on an unitialized object',
2208 None, None, None, tracename)
2209 assert tracename not in (i['trace'] for i in self._traces)
2210
2211 # Starts the script wrapper to start the child process. This signals the
2212 # dtrace script that this process is to be traced.
2213 stdout = stderr = None
2214 if output:
2215 stdout = subprocess.PIPE
2216 stderr = subprocess.STDOUT
2217 child_cmd = [
2218 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002219 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002220 tracename,
2221 ]
2222 # Call a dummy function so that dtrace knows I'm about to launch a process
2223 # that needs to be traced.
2224 # Yummy.
2225 child = subprocess.Popen(
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00002226 child_cmd + tools.fix_python_path(cmd),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002227 stdin=subprocess.PIPE,
2228 stdout=stdout,
2229 stderr=stderr,
2230 cwd=cwd)
2231 logging.debug('Started child pid: %d' % child.pid)
2232
2233 out = child.communicate()[0]
2234 # This doesn't mean tracing is done, one of the grand-child process may
2235 # still be alive. It will be tracked with the dtrace script.
2236
2237 with self._lock:
2238 assert tracename not in (i['trace'] for i in self._traces)
2239 self._traces.append(
2240 {
2241 'cmd': cmd,
2242 'cwd': cwd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002243 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002244 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002245 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002246 })
2247 return child.returncode, out
2248
2249 def close(self, timeout=None):
2250 """Terminates dtrace."""
2251 logging.debug('close(%s)' % timeout)
2252 try:
2253 try:
2254 super(Dtrace.Tracer, self).close(timeout)
2255 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002256 # ftruncate doesn't exist on Windows.
2257 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002258 if timeout:
2259 start = time.time()
2260 # Use polling. :/
2261 while (self._dtrace.poll() is None and
2262 (time.time() - start) < timeout):
2263 time.sleep(0.1)
2264 self._dtrace.kill()
2265 self._dtrace.wait()
2266 finally:
2267 # Make sure to kill it in any case.
2268 if self._dtrace.poll() is None:
2269 try:
2270 self._dtrace.kill()
2271 self._dtrace.wait()
2272 except OSError:
2273 pass
2274
2275 if self._dtrace.returncode != 0:
2276 # Warn about any dtrace failure but basically ignore it.
2277 print 'dtrace failure: %s' % self._dtrace.returncode
2278 finally:
2279 os.close(self._dummy_file_id)
2280 os.remove(self._dummy_file_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002281
2282 def post_process_log(self):
2283 """Sorts the log back in order when each call occured.
2284
2285 dtrace doesn't save the buffer in strict order since it keeps one buffer
2286 per CPU.
2287 """
2288 super(Dtrace.Tracer, self).post_process_log()
2289 logname = self._logname + '.log'
2290 with open(logname, 'rb') as logfile:
2291 lines = [l for l in logfile if l.strip()]
2292 errors = [l for l in lines if l.startswith('dtrace:')]
2293 if errors:
2294 raise TracingFailure(
2295 'Found errors in the trace: %s' % '\n'.join(errors),
2296 None, None, None, logname)
2297 try:
2298 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2299 except ValueError:
2300 raise TracingFailure(
2301 'Found errors in the trace: %s' % '\n'.join(
2302 l for l in lines if l.split(' ', 1)[0].isdigit()),
2303 None, None, None, logname)
2304 with open(logname, 'wb') as logfile:
2305 logfile.write(''.join(lines))
2306
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002307 def __init__(self, use_sudo=None):
2308 super(Dtrace, self).__init__()
2309 self.use_sudo = use_sudo
2310
2311 def get_tracer(self, logname):
2312 return self.Tracer(logname, self.use_sudo)
2313
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002314 @staticmethod
2315 def clean_trace(logname):
2316 for ext in ('', '.log'):
2317 if os.path.isfile(logname + ext):
2318 os.remove(logname + ext)
2319
2320 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002321 def parse_log(cls, logname, blacklist, trace_name):
2322 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002323 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002324
2325 def blacklist_more(filepath):
2326 # All the HFS metadata is in the form /.vol/...
2327 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2328
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05002329 data = tools.read_json(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002330 out = []
2331 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002332 if trace_name and item['trace'] != trace_name:
2333 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002334 result = {
2335 'output': item['output'],
2336 'trace': item['trace'],
2337 }
2338 try:
2339 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002340 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2341 # be valid UTF-8 and we control the log output.
2342 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002343 context.on_line(line)
2344 result['results'] = context.to_results()
2345 except TracingFailure:
2346 result['exception'] = sys.exc_info()
2347 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002348 return out
2349
2350
2351class LogmanTrace(ApiBase):
2352 """Uses the native Windows ETW based tracing functionality to trace a child
2353 process.
2354
2355 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2356 the Windows Kernel doesn't have a concept of 'current working directory' at
2357 all. A Win32 process has a map of current directories, one per drive letter
2358 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2359 opened relative to another file_object or as an absolute path. All the current
2360 working directory logic is done in user mode.
2361 """
2362 class Context(ApiBase.Context):
2363 """Processes a ETW log line and keeps the list of existent and non
2364 existent files accessed.
2365
2366 Ignores directories.
2367 """
2368 # These indexes are for the stripped version in json.
2369 EVENT_NAME = 0
2370 TYPE = 1
2371 PID = 2
2372 TID = 3
2373 PROCESSOR_ID = 4
2374 TIMESTAMP = 5
2375 USER_DATA = 6
2376
2377 class Process(ApiBase.Context.Process):
2378 def __init__(self, *args):
2379 super(LogmanTrace.Context.Process, self).__init__(*args)
2380 # Handle file objects that succeeded.
2381 self.file_objects = {}
2382
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002383 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2384 logging.info(
2385 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2386 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002387 super(LogmanTrace.Context, self).__init__(blacklist)
maruel@chromium.org561d4b22013-09-26 21:08:08 +00002388 self._drive_map = file_path.DosDriveMap()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002389 # Threads mapping to the corresponding process id.
2390 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002391 # Process ID of the tracer, e.g. the temporary script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002392 # create_subprocess_thunk(). This is tricky because the process id may
2393 # have been reused.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002394 self._thunk_pid = thunk_pid
2395 self._thunk_cmd = thunk_cmd
2396 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002397 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002398 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002399
2400 def on_line(self, line):
2401 """Processes a json Event line."""
2402 self._line_number += 1
2403 try:
2404 # By Opcode
2405 handler = getattr(
2406 self,
2407 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2408 None)
2409 if not handler:
2410 raise TracingFailure(
2411 'Unexpected event %s_%s' % (
2412 line[self.EVENT_NAME], line[self.TYPE]),
2413 None, None, None)
2414 handler(line)
2415 except TracingFailure, e:
2416 # Hack in the values since the handler could be a static function.
2417 e.pid = line[self.PID]
2418 e.line = line
2419 e.line_number = self._line_number
2420 # Re-raise the modified exception.
2421 raise
2422 except (KeyError, NotImplementedError, ValueError), e:
2423 raise TracingFailure(
2424 'Trace generated a %s exception: %s' % (
2425 e.__class__.__name__, str(e)),
2426 line[self.PID],
2427 self._line_number,
2428 line,
2429 e)
2430
2431 def to_results(self):
2432 if not self.root_process:
2433 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002434 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002435 None, None, None)
2436 process = self.root_process.to_results_process()
2437 return Results(process)
2438
2439 def _thread_to_process(self, tid):
2440 """Finds the process from the thread id."""
2441 tid = int(tid, 16)
2442 pid = self._threads_active.get(tid)
2443 if not pid or not self._process_lookup.get(pid):
2444 return
2445 return self._process_lookup[pid]
2446
2447 @classmethod
2448 def handle_EventTrace_Header(cls, line):
2449 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2450 BUFFER_SIZE = cls.USER_DATA
2451 #VERSION = cls.USER_DATA + 1
2452 #PROVIDER_VERSION = cls.USER_DATA + 2
2453 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2454 #END_TIME = cls.USER_DATA + 4
2455 #TIMER_RESOLUTION = cls.USER_DATA + 5
2456 #MAX_FILE_SIZE = cls.USER_DATA + 6
2457 #LOG_FILE_MODE = cls.USER_DATA + 7
2458 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2459 #START_BUFFERS = cls.USER_DATA + 9
2460 #POINTER_SIZE = cls.USER_DATA + 10
2461 EVENTS_LOST = cls.USER_DATA + 11
2462 #CPU_SPEED = cls.USER_DATA + 12
2463 #LOGGER_NAME = cls.USER_DATA + 13
2464 #LOG_FILE_NAME = cls.USER_DATA + 14
2465 #BOOT_TIME = cls.USER_DATA + 15
2466 #PERF_FREQ = cls.USER_DATA + 16
2467 #START_TIME = cls.USER_DATA + 17
2468 #RESERVED_FLAGS = cls.USER_DATA + 18
2469 #BUFFERS_LOST = cls.USER_DATA + 19
2470 #SESSION_NAME_STRING = cls.USER_DATA + 20
2471 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2472 if line[EVENTS_LOST] != '0':
2473 raise TracingFailure(
2474 ( '%s events were lost during trace, please increase the buffer '
2475 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2476 None, None, None)
2477
2478 def handle_FileIo_Cleanup(self, line):
2479 """General wisdom: if a file is closed, it's because it was opened.
2480
2481 Note that FileIo_Close is not used since if a file was opened properly but
2482 not closed before the process exits, only Cleanup will be logged.
2483 """
2484 #IRP = self.USER_DATA
2485 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2486 FILE_OBJECT = self.USER_DATA + 2
2487 #FILE_KEY = self.USER_DATA + 3
2488 proc = self._thread_to_process(line[TTID])
2489 if not proc:
2490 # Not a process we care about.
2491 return
2492 file_object = line[FILE_OBJECT]
2493 if file_object in proc.file_objects:
maruel@chromium.org538141b2013-06-03 20:57:17 +00002494 filepath, access_type = proc.file_objects.pop(file_object)
2495 proc.add_file(filepath, access_type)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002496
2497 def handle_FileIo_Create(self, line):
2498 """Handles a file open.
2499
2500 All FileIo events are described at
2501 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2502 for some value of 'description'.
2503
2504 " (..) process and thread id values of the IO events (..) are not valid "
2505 http://msdn.microsoft.com/magazine/ee358703.aspx
2506
2507 The FileIo.Create event doesn't return if the CreateFile() call
2508 succeeded, so keep track of the file_object and check that it is
2509 eventually closed with FileIo_Cleanup.
2510 """
2511 #IRP = self.USER_DATA
2512 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2513 FILE_OBJECT = self.USER_DATA + 2
2514 #CREATE_OPTIONS = self.USER_DATA + 3
2515 #FILE_ATTRIBUTES = self.USER_DATA + 4
maruel@chromium.org538141b2013-06-03 20:57:17 +00002516 #SHARE_ACCESS = self.USER_DATA + 5
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002517 OPEN_PATH = self.USER_DATA + 6
2518
2519 proc = self._thread_to_process(line[TTID])
2520 if not proc:
2521 # Not a process we care about.
2522 return
2523
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002524 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002525 # Ignore directories and bare drive right away.
2526 if raw_path.endswith(os.path.sep):
2527 return
2528 filepath = self._drive_map.to_win32(raw_path)
2529 # Ignore bare drive right away. Some may still fall through with format
2530 # like '\\?\X:'
2531 if len(filepath) == 2:
2532 return
2533 file_object = line[FILE_OBJECT]
2534 if os.path.isdir(filepath):
2535 # There is no O_DIRECTORY equivalent on Windows. The closed is
2536 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2537 # simply discard directories are they are found.
2538 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002539 # Override any stale file object.
2540 # TODO(maruel): Figure out a way to detect if the file was opened for
2541 # reading or writting. Sadly CREATE_OPTIONS doesn't seem to be of any help
2542 # here. For now mark as None to make it clear we have no idea what it is
2543 # about.
2544 proc.file_objects[file_object] = (filepath, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002545
2546 def handle_FileIo_Rename(self, line):
2547 # TODO(maruel): Handle?
2548 pass
2549
2550 def handle_Process_End(self, line):
2551 pid = line[self.PID]
2552 if self._process_lookup.get(pid):
2553 logging.info('Terminated: %d' % pid)
2554 self._process_lookup[pid] = None
2555 else:
2556 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002557 if self._thunk_process and self._thunk_process.pid == pid:
2558 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002559
2560 def handle_Process_Start(self, line):
2561 """Handles a new child process started by PID."""
2562 #UNIQUE_PROCESS_KEY = self.USER_DATA
2563 PROCESS_ID = self.USER_DATA + 1
2564 #PARENT_PID = self.USER_DATA + 2
2565 #SESSION_ID = self.USER_DATA + 3
2566 #EXIT_STATUS = self.USER_DATA + 4
2567 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2568 #USER_SID = self.USER_DATA + 6
2569 IMAGE_FILE_NAME = self.USER_DATA + 7
2570 COMMAND_LINE = self.USER_DATA + 8
2571
2572 ppid = line[self.PID]
2573 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002574 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002575 logging.debug(
2576 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002577 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002578
2579 if pid == self._thunk_pid:
2580 # Need to ignore processes we don't know about because the log is
2581 # system-wide. self._thunk_pid shall start only one process.
2582 # This is tricky though because Windows *loves* to reuse process id and
2583 # it happens often that the process ID of the thunk script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002584 # create_subprocess_thunk() is reused. So just detecting the pid here is
2585 # not sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002586 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2587 logging.info(
2588 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2589 pid, self._trace_name, command_line, self._thunk_cmd)
2590 return
2591
2592 # TODO(maruel): The check is quite weak. Add the thunk path.
2593 if self._thunk_process:
2594 raise TracingFailure(
2595 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2596 'already set') % (self._thunk_pid, self._thunk_process.pid),
2597 None, None, None)
2598 proc = self.Process(self.blacklist, pid, None)
2599 self._thunk_process = proc
2600 return
2601 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002602 proc = self.Process(self.blacklist, pid, None)
2603 self.root_process = proc
2604 ppid = None
2605 elif self._process_lookup.get(ppid):
2606 proc = self.Process(self.blacklist, pid, None)
2607 self._process_lookup[ppid].children.append(proc)
2608 else:
2609 # Ignore
2610 return
2611 self._process_lookup[pid] = proc
2612
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002613 proc.command = command_line
2614 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002615 # proc.command[0] may be the absolute path of 'executable' but it may be
2616 # anything else too. If it happens that command[0] ends with executable,
2617 # use it, otherwise defaults to the base name.
2618 cmd0 = proc.command[0].lower()
2619 if not cmd0.endswith('.exe'):
2620 # TODO(maruel): That's not strictly true either.
2621 cmd0 += '.exe'
2622 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
2623 # Fix the path.
2624 cmd0 = cmd0.replace('/', os.path.sep)
2625 cmd0 = os.path.normpath(cmd0)
maruel@chromium.org561d4b22013-09-26 21:08:08 +00002626 proc.executable = file_path.get_native_path_case(cmd0)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002627 logging.info(
2628 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2629
2630 def handle_Thread_End(self, line):
2631 """Has the same parameters as Thread_Start."""
2632 tid = int(line[self.TID], 16)
2633 self._threads_active.pop(tid, None)
2634
2635 def handle_Thread_Start(self, line):
2636 """Handles a new thread created.
2637
2638 Do not use self.PID here since a process' initial thread is created by
2639 the parent process.
2640 """
2641 PROCESS_ID = self.USER_DATA
2642 TTHREAD_ID = self.USER_DATA + 1
2643 #STACK_BASE = self.USER_DATA + 2
2644 #STACK_LIMIT = self.USER_DATA + 3
2645 #USER_STACK_BASE = self.USER_DATA + 4
2646 #USER_STACK_LIMIT = self.USER_DATA + 5
2647 #AFFINITY = self.USER_DATA + 6
2648 #WIN32_START_ADDR = self.USER_DATA + 7
2649 #TEB_BASE = self.USER_DATA + 8
2650 #SUB_PROCESS_TAG = self.USER_DATA + 9
2651 #BASE_PRIORITY = self.USER_DATA + 10
2652 #PAGE_PRIORITY = self.USER_DATA + 11
2653 #IO_PRIORITY = self.USER_DATA + 12
2654 #THREAD_FLAGS = self.USER_DATA + 13
2655 # Do not use self.PID here since a process' initial thread is created by
2656 # the parent process.
2657 pid = int(line[PROCESS_ID], 16)
2658 tid = int(line[TTHREAD_ID], 16)
2659 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2660 self._threads_active[tid] = pid
2661
2662 @classmethod
2663 def supported_events(cls):
2664 """Returns all the procesed events."""
2665 out = []
2666 for member in dir(cls):
2667 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2668 if match:
2669 out.append(match.groups())
2670 return out
2671
2672 class Tracer(ApiBase.Tracer):
2673 # The basic headers.
2674 EXPECTED_HEADER = [
2675 u'Event Name',
2676 u'Type',
2677 u'Event ID',
2678 u'Version',
2679 u'Channel',
2680 u'Level', # 5
2681 u'Opcode',
2682 u'Task',
2683 u'Keyword',
2684 u'PID',
2685 u'TID', # 10
2686 u'Processor Number',
2687 u'Instance ID',
2688 u'Parent Instance ID',
2689 u'Activity ID',
2690 u'Related Activity ID', # 15
2691 u'Clock-Time',
2692 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2693 u'User(ms)', # pretty much useless.
2694 u'User Data', # Extra arguments that are event-specific.
2695 ]
2696 # Only the useful headers common to all entries are listed there. Any column
2697 # at 19 or higher is dependent on the specific event.
2698 EVENT_NAME = 0
2699 TYPE = 1
2700 PID = 9
2701 TID = 10
2702 PROCESSOR_ID = 11
2703 TIMESTAMP = 16
2704 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2705 USER_DATA = 19
2706
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002707 class CsvReader(object):
2708 """CSV reader that reads files generated by tracerpt.exe.
2709
2710 csv.reader() fails to read them properly, it mangles file names quoted
2711 with "" with a comma in it.
2712 """
2713 # 0. Had a ',' or one of the following ' ' after a comma, next should
2714 # be ' ', '"' or string or ',' for an empty field.
2715 ( HAD_DELIMITER,
2716 # 1. Processing an unquoted field up to ','.
2717 IN_STR,
2718 # 2. Processing a new field starting with '"'.
2719 STARTING_STR_QUOTED,
2720 # 3. Second quote in a row at the start of a field. It could be either
2721 # '""foo""' or '""'. Who the hell thought it was a great idea to use
2722 # the same character for delimiting and escaping?
2723 STARTING_SECOND_QUOTE,
2724 # 4. A quote inside a quoted string where the previous character was
2725 # not a quote, so the string is not empty. Can be either: end of a
2726 # quoted string (a delimiter) or a quote escape. The next char must be
2727 # either '"' or ','.
2728 HAD_QUOTE_IN_QUOTED,
2729 # 5. Second quote inside a quoted string.
2730 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
2731 # 6. Processing a field that started with '"'.
2732 IN_STR_QUOTED) = range(7)
2733
2734 def __init__(self, f):
2735 self.f = f
2736
2737 def __iter__(self):
2738 return self
2739
2740 def next(self):
2741 """Splits the line in fields."""
2742 line = self.f.readline()
2743 if not line:
2744 raise StopIteration()
2745 line = line.strip()
2746 fields = []
2747 state = self.HAD_DELIMITER
2748 for i, c in enumerate(line):
2749 if state == self.HAD_DELIMITER:
2750 if c == ',':
2751 # Empty field.
2752 fields.append('')
2753 elif c == ' ':
2754 # Ignore initial whitespaces
2755 pass
2756 elif c == '"':
2757 state = self.STARTING_STR_QUOTED
2758 fields.append('')
2759 else:
2760 # Start of a new field.
2761 state = self.IN_STR
2762 fields.append(c)
2763
2764 elif state == self.IN_STR:
2765 # Do not accept quote inside unquoted field.
2766 assert c != '"', (i, c, line, fields)
2767 if c == ',':
2768 fields[-1] = fields[-1].strip()
2769 state = self.HAD_DELIMITER
2770 else:
2771 fields[-1] = fields[-1] + c
2772
2773 elif state == self.STARTING_STR_QUOTED:
2774 if c == '"':
2775 # Do not store the character yet.
2776 state = self.STARTING_SECOND_QUOTE
2777 else:
2778 state = self.IN_STR_QUOTED
2779 fields[-1] = fields[-1] + c
2780
2781 elif state == self.STARTING_SECOND_QUOTE:
2782 if c == ',':
2783 # It was an empty field. '""' == ''.
2784 state = self.HAD_DELIMITER
2785 else:
2786 fields[-1] = fields[-1] + '"' + c
2787 state = self.IN_STR_QUOTED
2788
2789 elif state == self.HAD_QUOTE_IN_QUOTED:
2790 if c == ',':
2791 # End of the string.
2792 state = self.HAD_DELIMITER
2793 elif c == '"':
2794 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
2795 else:
2796 # The previous double-quote was just an unescaped quote.
2797 fields[-1] = fields[-1] + '"' + c
2798 state = self.IN_STR_QUOTED
2799
2800 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2801 if c == ',':
2802 # End of the string.
2803 state = self.HAD_DELIMITER
2804 fields[-1] = fields[-1] + '"'
2805 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00002806 # That's just how the logger rolls. Revert back to appending the
2807 # char and "guess" it was a quote in a double-quoted string.
2808 state = self.IN_STR_QUOTED
2809 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002810
2811 elif state == self.IN_STR_QUOTED:
2812 if c == '"':
2813 # Could be a delimiter or an escape.
2814 state = self.HAD_QUOTE_IN_QUOTED
2815 else:
2816 fields[-1] = fields[-1] + c
2817
2818 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2819 fields[-1] = fields[-1] + '"'
2820 else:
2821 assert state in (
2822 # Terminated with a normal field.
2823 self.IN_STR,
2824 # Terminated with an empty field.
2825 self.STARTING_SECOND_QUOTE,
2826 # Terminated with a normal quoted field.
2827 self.HAD_QUOTE_IN_QUOTED), (
2828 line, state, fields)
2829 return fields
2830
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002831 def __init__(self, logname):
2832 """Starts the log collection.
2833
2834 Requires administrative access. logman.exe is synchronous so no need for a
2835 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2836 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2837
2838 One can get the list of potentially interesting providers with:
2839 "logman query providers | findstr /i file"
2840 """
2841 super(LogmanTrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002842 self._signal_script = create_subprocess_thunk()
2843 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002844 cmd_start = [
2845 'logman.exe',
2846 'start',
2847 'NT Kernel Logger',
2848 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2849 # splitio,fileiocompletion,syscall,file,cswitch,img
2850 '(process,fileio,thread)',
2851 '-o', self._logname + '.etl',
2852 '-ets', # Send directly to kernel
2853 # Values extracted out of thin air.
2854 # Event Trace Session buffer size in kb.
2855 '-bs', '10240',
2856 # Number of Event Trace Session buffers.
2857 '-nb', '16', '256',
2858 ]
2859 logging.debug('Running: %s' % cmd_start)
2860 try:
2861 subprocess.check_call(
2862 cmd_start,
2863 stdin=subprocess.PIPE,
2864 stdout=subprocess.PIPE,
2865 stderr=subprocess.STDOUT)
2866 except subprocess.CalledProcessError, e:
2867 if e.returncode == -2147024891:
2868 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2869 elif e.returncode == -2144337737:
2870 print >> sys.stderr, (
2871 'A kernel trace was already running, stop it and try again')
2872 raise
2873
2874 def trace(self, cmd, cwd, tracename, output):
2875 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2876 assert os.path.isabs(cmd[0]), cmd[0]
2877 assert os.path.isabs(cwd), cwd
2878 assert os.path.normpath(cwd) == cwd, cwd
2879 with self._lock:
2880 if not self._initialized:
2881 raise TracingFailure(
2882 'Called Tracer.trace() on an unitialized object',
2883 None, None, None, tracename)
2884 assert tracename not in (i['trace'] for i in self._traces)
2885
2886 # Use "logman -?" for help.
2887
2888 stdout = stderr = None
2889 if output:
2890 stdout = subprocess.PIPE
2891 stderr = subprocess.STDOUT
2892
2893 # Run the child process.
2894 logging.debug('Running: %s' % cmd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002895 # Use the temporary script generated with create_subprocess_thunk() so we
2896 # have a clear pid owner. Since trace_inputs.py can be used as a library
2897 # and could trace multiple processes simultaneously, it makes it more
2898 # complex if the executable to be traced is executed directly here. It
2899 # also solves issues related to logman.exe that needs to be executed to
2900 # control the kernel trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002901 child_cmd = [
2902 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002903 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002904 tracename,
2905 ]
2906 child = subprocess.Popen(
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00002907 child_cmd + tools.fix_python_path(cmd),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002908 cwd=cwd,
2909 stdin=subprocess.PIPE,
2910 stdout=stdout,
2911 stderr=stderr)
2912 logging.debug('Started child pid: %d' % child.pid)
2913 out = child.communicate()[0]
2914 # This doesn't mean all the grand-children are done. Sadly, we don't have
2915 # a good way to determine that.
2916
2917 with self._lock:
2918 assert tracename not in (i['trace'] for i in self._traces)
2919 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002920 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002921 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002922 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002923 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002924 # Used to figure out the real process when process ids are reused.
2925 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002926 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002927 })
2928
2929 return child.returncode, out
2930
2931 def close(self, _timeout=None):
2932 """Stops the kernel log collection and converts the traces to text
2933 representation.
2934 """
2935 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00002936 try:
2937 super(LogmanTrace.Tracer, self).close()
2938 finally:
2939 cmd_stop = [
2940 'logman.exe',
2941 'stop',
2942 'NT Kernel Logger',
2943 '-ets', # Sends the command directly to the kernel.
2944 ]
2945 logging.debug('Running: %s' % cmd_stop)
2946 subprocess.check_call(
2947 cmd_stop,
2948 stdin=subprocess.PIPE,
2949 stdout=subprocess.PIPE,
2950 stderr=subprocess.STDOUT)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002951
2952 def post_process_log(self):
2953 """Converts the .etl file into .csv then into .json."""
2954 super(LogmanTrace.Tracer, self).post_process_log()
2955 logformat = 'csv'
2956 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002957 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002958
maruel@chromium.org808e3f22013-05-16 16:42:54 +00002959 def _gen_logdata(self):
2960 return {
2961 'format': 'csv',
2962 'traces': self._traces,
2963 }
2964
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002965 def _trim_log(self, logformat):
2966 """Reduces the amount of data in original log by generating a 'reduced'
2967 log.
2968 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002969 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002970 file_handle = codecs.open(
2971 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002972
2973 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002974 assert sys.getfilesystemencoding() == 'mbcs'
2975 file_handle = codecs.open(
2976 self._logname + '.' + logformat, 'r',
2977 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002978
2979 supported_events = LogmanTrace.Context.supported_events()
2980
2981 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002982 """Loads items from the generator and returns the interesting data.
2983
2984 It filters out any uninteresting line and reduce the amount of data in
2985 the trace.
2986 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002987 for index, line in enumerate(generator):
2988 if not index:
2989 if line != self.EXPECTED_HEADER:
2990 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002991 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002992 None, None, None)
2993 continue
2994 # As you can see, the CSV is full of useful non-redundant information:
2995 if (line[2] != '0' or # Event ID
2996 line[3] not in ('2', '3') or # Version
2997 line[4] != '0' or # Channel
2998 line[5] != '0' or # Level
2999 line[7] != '0' or # Task
3000 line[8] != '0x0000000000000000' or # Keyword
3001 line[12] != '' or # Instance ID
3002 line[13] != '' or # Parent Instance ID
3003 line[14] != self.NULL_GUID or # Activity ID
3004 line[15] != ''): # Related Activity ID
3005 raise TracingFailure(
3006 'Found unexpected values in line: %s' % ' '.join(line),
3007 None, None, None)
3008
3009 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3010 continue
3011
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003012 yield [
3013 line[self.EVENT_NAME],
3014 line[self.TYPE],
3015 line[self.PID],
3016 line[self.TID],
3017 line[self.PROCESSOR_ID],
3018 line[self.TIMESTAMP],
3019 ] + line[self.USER_DATA:]
3020
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003021 # must not convert the trim() call into a list, since it will use too much
3022 # memory for large trace. use a csv file as a workaround since the json
3023 # parser requires a complete in-memory file.
3024 with open('%s.preprocessed' % self._logname, 'wb') as f:
3025 # $ and * can't be used in file name on windows, reducing the likelihood
3026 # of having to escape a string.
3027 out = csv.writer(
3028 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003029 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003030 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003031
3032 def _convert_log(self, logformat):
3033 """Converts the ETL trace to text representation.
3034
3035 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3036 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3037
3038 Arguments:
3039 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3040
3041 Use "tracerpt -?" for help.
3042 """
3043 LOCALE_INVARIANT = 0x7F
3044 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3045 cmd_convert = [
3046 'tracerpt.exe',
3047 '-l', self._logname + '.etl',
3048 '-o', self._logname + '.' + logformat,
3049 '-gmt', # Use UTC
3050 '-y', # No prompt
3051 # Use -of XML to get the header of each items after column 19, e.g. all
3052 # the actual headers of 'User Data'.
3053 ]
3054
3055 if logformat == 'csv':
3056 # tracerpt localizes the 'Type' column, for major brainfuck
3057 # entertainment. I can't imagine any sane reason to do that.
3058 cmd_convert.extend(['-of', 'CSV'])
3059 elif logformat == 'csv_utf16':
3060 # This causes it to use UTF-16, which doubles the log size but ensures
3061 # the log is readable for non-ASCII characters.
3062 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3063 elif logformat == 'xml':
3064 cmd_convert.extend(['-of', 'XML'])
3065 else:
3066 raise ValueError('Unexpected log format \'%s\'' % logformat)
3067 logging.debug('Running: %s' % cmd_convert)
3068 # This can takes tens of minutes for large logs.
3069 # Redirects all output to stderr.
3070 subprocess.check_call(
3071 cmd_convert,
3072 stdin=subprocess.PIPE,
3073 stdout=sys.stderr,
3074 stderr=sys.stderr)
3075
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003076 def __init__(self, use_sudo=False): # pylint: disable=W0613
3077 super(LogmanTrace, self).__init__()
3078 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3079
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003080 @staticmethod
3081 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003082 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003083 if os.path.isfile(logname + ext):
3084 os.remove(logname + ext)
3085
3086 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003087 def parse_log(cls, logname, blacklist, trace_name):
3088 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003089 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003090
3091 def blacklist_more(filepath):
3092 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3093 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3094
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003095 # Create a list of (Context, result_dict) tuples. This is necessary because
3096 # the csv file may be larger than the amount of available memory.
3097 contexes = [
3098 (
3099 cls.Context(
3100 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3101 {
3102 'output': item['output'],
3103 'trace': item['trace'],
3104 },
3105 )
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05003106 for item in tools.read_json(logname)['traces']
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003107 if not trace_name or item['trace'] == trace_name
3108 ]
3109
3110 # The log may be too large to fit in memory and it is not efficient to read
3111 # it multiple times, so multiplex the contexes instead, which is slightly
3112 # more awkward.
3113 with open('%s.preprocessed' % logname, 'rb') as f:
3114 lines = csv.reader(
3115 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3116 for encoded in lines:
3117 line = [s.decode('utf-8') for s in encoded]
3118 # Convert the PID in-place from hex.
3119 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3120 for context in contexes:
3121 if 'exception' in context[1]:
3122 continue
3123 try:
3124 context[0].on_line(line)
3125 except TracingFailure:
3126 context[1]['exception'] = sys.exc_info()
3127
3128 for context in contexes:
3129 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003130 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003131 context[1]['results'] = context[0].to_results()
3132
3133 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003134
3135
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003136def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003137 """Returns the correct implementation for the current OS."""
3138 if sys.platform == 'cygwin':
3139 raise NotImplementedError(
3140 'Not implemented for cygwin, start the script from Win32 python')
3141 flavors = {
3142 'win32': LogmanTrace,
3143 'darwin': Dtrace,
3144 'sunos5': Dtrace,
3145 'freebsd7': Dtrace,
3146 'freebsd8': Dtrace,
3147 }
3148 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003149 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003150
3151
3152def extract_directories(root_dir, files, blacklist):
3153 """Detects if all the files in a directory are in |files| and if so, replace
3154 the individual files by a Results.Directory instance.
3155
3156 Takes a list of Results.File instances and returns a shorter list of
3157 Results.File and Results.Directory instances.
3158
3159 Arguments:
3160 - root_dir: Optional base directory that shouldn't be search further.
3161 - files: list of Results.File instances.
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003162 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003163 """
3164 logging.info(
3165 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3166 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003167 # It is important for root_dir to not be a symlinked path, make sure to call
3168 # os.path.realpath() as needed.
3169 assert not root_dir or (
maruel@chromium.org561d4b22013-09-26 21:08:08 +00003170 os.path.realpath(file_path.get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003171 assert not any(isinstance(f, Results.Directory) for f in files)
3172 # Remove non existent files.
3173 files = [f for f in files if f.existent]
3174 if not files:
3175 return files
3176 # All files must share the same root, which can be None.
3177 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3178
3179 # Creates a {directory: {filename: File}} mapping, up to root.
3180 buckets = {}
3181 if root_dir:
3182 buckets[root_dir] = {}
3183 for fileobj in files:
3184 path = fileobj.full_path
3185 directory = os.path.dirname(path)
3186 assert directory
3187 # Do not use os.path.basename() so trailing os.path.sep is kept.
3188 basename = path[len(directory)+1:]
3189 files_in_directory = buckets.setdefault(directory, {})
3190 files_in_directory[basename] = fileobj
3191 # Add all the directories recursively up to root.
3192 while True:
3193 old_d = directory
3194 directory = os.path.dirname(directory)
3195 if directory + os.path.sep == root_dir or directory == old_d:
3196 break
3197 buckets.setdefault(directory, {})
3198
3199 root_prefix = len(root_dir) + 1 if root_dir else 0
3200 for directory in sorted(buckets, reverse=True):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003201 if not os.path.isdir(directory):
3202 logging.debug(
3203 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3204 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003205 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3206 expected = set(buckets[directory])
3207 if not (actual - expected):
3208 parent = os.path.dirname(directory)
3209 buckets[parent][os.path.basename(directory)] = Results.Directory(
3210 root_dir,
3211 directory[root_prefix:],
3212 False,
3213 sum(f.size for f in buckets[directory].itervalues()),
3214 sum(f.nb_files for f in buckets[directory].itervalues()))
3215 # Remove the whole bucket.
3216 del buckets[directory]
3217
3218 # Reverse the mapping with what remains. The original instances are returned,
3219 # so the cached meta data is kept.
3220 files = sum((x.values() for x in buckets.itervalues()), [])
3221 return sorted(files, key=lambda x: x.path)
3222
3223
3224def trace(logfile, cmd, cwd, api, output):
3225 """Traces an executable. Returns (returncode, output) from api.
3226
3227 Arguments:
3228 - logfile: file to write to.
3229 - cmd: command to run.
3230 - cwd: current directory to start the process in.
3231 - api: a tracing api instance.
3232 - output: if True, returns output, otherwise prints it at the console.
3233 """
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003234 cmd = tools.fix_python_path(cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003235 api.clean_trace(logfile)
3236 with api.get_tracer(logfile) as tracer:
3237 return tracer.trace(cmd, cwd, 'default', output)
3238
3239
maruel@chromium.orge5322512013-08-19 20:17:57 +00003240def CMDclean(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003241 """Cleans up traces."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003242 options, args = parser.parse_args(args)
3243 api = get_api()
3244 api.clean_trace(options.log)
3245 return 0
3246
3247
maruel@chromium.orge5322512013-08-19 20:17:57 +00003248def CMDtrace(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003249 """Traces an executable."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003250 parser.allow_interspersed_args = False
3251 parser.add_option(
3252 '-q', '--quiet', action='store_true',
3253 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003254 parser.add_option(
3255 '-s', '--sudo', action='store_true',
3256 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3257 parser.add_option(
3258 '-n', '--no-sudo', action='store_false',
3259 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003260 options, args = parser.parse_args(args)
3261
3262 if not args:
3263 parser.error('Please provide a command to run')
3264
Marc-Antoine Ruel5906fcd2013-11-08 21:02:24 -05003265 if not can_trace():
3266 parser.error('Please rerun this program with admin privileges')
3267
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003268 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3269 args[0] = os.path.abspath(args[0])
3270
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003271 # options.sudo default value is None, which is to do whatever tracer defaults
3272 # do.
3273 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003274 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3275
3276
maruel@chromium.orge5322512013-08-19 20:17:57 +00003277def CMDread(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003278 """Reads the logs and prints the result."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003279 parser.add_option(
3280 '-V', '--variable',
3281 nargs=2,
3282 action='append',
3283 dest='variables',
3284 metavar='VAR_NAME directory',
3285 default=[],
3286 help=('Variables to replace relative directories against. Example: '
3287 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3288 'home dir with $HOME') % getpass.getuser())
3289 parser.add_option(
3290 '--root-dir',
3291 help='Root directory to base everything off it. Anything outside of this '
3292 'this directory will not be reported')
3293 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003294 '--trace-name',
3295 help='Only reads one of the trace. Defaults to reading all traces')
3296 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003297 '-j', '--json', action='store_true',
3298 help='Outputs raw result data as json')
3299 parser.add_option(
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003300 '--trace-blacklist', action='append', default=[],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003301 help='List of regexp to use as blacklist filter')
3302 options, args = parser.parse_args(args)
3303
3304 if options.root_dir:
maruel@chromium.org561d4b22013-09-26 21:08:08 +00003305 options.root_dir = file_path.get_native_path_case(
maruel@chromium.org7dc17b32013-04-02 22:27:05 +00003306 unicode(os.path.abspath(options.root_dir)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003307
3308 variables = dict(options.variables)
3309 api = get_api()
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05003310 blacklist = tools.gen_blacklist(options.trace_blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003311 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003312 # Process each trace.
3313 output_as_json = []
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003314 try:
3315 for item in data:
3316 if 'exception' in item:
3317 # Do not abort the other traces.
3318 print >> sys.stderr, (
3319 'Trace %s: Got an exception: %s' % (
3320 item['trace'], item['exception'][1]))
3321 continue
3322 results = item['results']
3323 if options.root_dir:
3324 results = results.strip_root(options.root_dir)
3325
3326 if options.json:
3327 output_as_json.append(results.flatten())
3328 else:
3329 simplified = extract_directories(
3330 options.root_dir, results.files, blacklist)
3331 simplified = [f.replace_variables(variables) for f in simplified]
3332 if len(data) > 1:
3333 print('Trace: %s' % item['trace'])
3334 print('Total: %d' % len(results.files))
3335 print('Non existent: %d' % len(results.non_existent))
3336 for f in results.non_existent:
3337 print(' %s' % f.path)
3338 print(
3339 'Interesting: %d reduced to %d' % (
3340 len(results.existent), len(simplified)))
3341 for f in simplified:
3342 print(' %s' % f.path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003343
3344 if options.json:
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05003345 tools.write_json(sys.stdout, output_as_json, False)
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003346 except KeyboardInterrupt:
3347 return 1
3348 except IOError as e:
3349 if e.errno == errno.EPIPE:
3350 # Do not print a stack trace when the output is piped to less and the user
3351 # quits before the whole output was written.
3352 return 1
3353 raise
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003354 return 0
3355
3356
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04003357class OptionParserTraceInputs(logging_utils.OptionParserWithLogging):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003358 """Adds automatic --log handling."""
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003359
3360 # Disable --log-file options since both --log and --log-file options are
3361 # confusing.
3362 # TODO(vadimsh): Rename --log-file or --log to something else.
3363 enable_log_file = False
3364
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003365 def __init__(self, **kwargs):
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04003366 logging_utils.OptionParserWithLogging.__init__(self, **kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003367 self.add_option(
3368 '-l', '--log', help='Log file to generate or read, required')
3369
3370 def parse_args(self, *args, **kwargs):
3371 """Makes sure the paths make sense.
3372
3373 On Windows, / and \ are often mixed together in a path.
3374 """
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04003375 options, args = logging_utils.OptionParserWithLogging.parse_args(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003376 self, *args, **kwargs)
3377 if not options.log:
3378 self.error('Must supply a log file with -l')
3379 options.log = os.path.abspath(options.log)
3380 return options, args
3381
3382
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003383def main(argv):
maruel@chromium.orge5322512013-08-19 20:17:57 +00003384 dispatcher = subcommand.CommandDispatcher(__name__)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003385 try:
maruel@chromium.org3d671992013-08-20 00:38:27 +00003386 return dispatcher.execute(
3387 OptionParserTraceInputs(version=__version__), argv)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003388 except TracingFailure, e:
3389 sys.stderr.write('\nError: ')
3390 sys.stderr.write(str(e))
3391 sys.stderr.write('\n')
3392 return 1
3393
3394
3395if __name__ == '__main__':
maruel@chromium.orge5322512013-08-19 20:17:57 +00003396 fix_encoding.fix_encoding()
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003397 tools.disable_buffering()
maruel@chromium.orge5322512013-08-19 20:17:57 +00003398 colorama.init()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003399 sys.exit(main(sys.argv[1:]))