blob: 9ab41353abeae92aac565a154cd2026f783c588e [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05003# Copyright 2012 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05004# Use of this source code is governed under the Apache License, Version 2.0 that
5# can be found in the LICENSE file.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00006
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
maruel@chromium.org9e56b592013-05-21 16:56:05 +000023import errno
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000024import getpass
25import glob
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000026import logging
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000027import os
28import re
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000029import stat
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000030import subprocess
31import sys
32import tempfile
33import threading
34import time
35import weakref
36
maruel@chromium.orge5322512013-08-19 20:17:57 +000037from third_party import colorama
38from third_party.depot_tools import fix_encoding
39from third_party.depot_tools import subcommand
40
maruel@chromium.org561d4b22013-09-26 21:08:08 +000041from utils import file_path
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000042from utils import tools
43
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000044## OS-specific imports
45
46if sys.platform == 'win32':
maruel@chromium.org561d4b22013-09-26 21:08:08 +000047 from ctypes.wintypes import byref, c_int, c_wchar_p
48 from ctypes.wintypes import windll # pylint: disable=E0611
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000049
50
maruel@chromium.org3d671992013-08-20 00:38:27 +000051__version__ = '0.1'
52
53
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000054BASE_DIR = os.path.dirname(os.path.abspath(__file__))
55ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
56
57
58class TracingFailure(Exception):
59 """An exception occured during tracing."""
60 def __init__(self, description, pid, line_number, line, *args):
61 super(TracingFailure, self).__init__(
62 description, pid, line_number, line, *args)
63 self.description = description
64 self.pid = pid
65 self.line_number = line_number
66 self.line = line
67 self.extra = args
68
69 def __str__(self):
70 out = self.description
71 if self.pid:
72 out += '\npid: %d' % self.pid
73 if self.line_number:
74 out += '\nline: %d' % self.line_number
75 if self.line:
76 out += '\n%s' % self.line
77 if self.extra:
78 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
79 return out
80
81
82## OS-specific functions
83
84if sys.platform == 'win32':
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000085 def get_current_encoding():
86 """Returns the 'ANSI' code page associated to the process."""
87 return 'cp%d' % int(windll.kernel32.GetACP())
88
89
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000090 def CommandLineToArgvW(command_line):
91 """Splits a commandline into argv using CommandLineToArgvW()."""
92 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
93 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +000094 assert isinstance(command_line, unicode)
95 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000096 try:
97 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
98 finally:
99 windll.kernel32.LocalFree(ptr)
100
101
Marc-Antoine Ruel5906fcd2013-11-08 21:02:24 -0500102def can_trace():
103 """Returns True if the user is an administrator on Windows.
104
105 It is required for tracing to work.
106 """
107 if sys.platform != 'win32':
108 return True
109 return bool(windll.shell32.IsUserAnAdmin())
110
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000111
maruel@chromium.org3683afe2013-07-27 00:09:27 +0000112def gen_blacklist(regexes):
113 """Returns a lambda to be used as a blacklist."""
114 compiled = [re.compile(i) for i in regexes]
115 def match(f):
116 return any(j.match(f) for j in compiled)
117 return match
118
119
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000120def create_subprocess_thunk():
121 """Creates a small temporary script to start the child process.
122
123 This thunk doesn't block, its unique name is used to identify it as the
124 parent.
125 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000126 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000127 try:
128 os.write(
129 handle,
130 (
131 'import subprocess, sys\n'
132 'sys.exit(subprocess.call(sys.argv[2:]))\n'
133 ))
134 finally:
135 os.close(handle)
136 return name
137
138
139def create_exec_thunk():
140 """Creates a small temporary script to start the child executable.
141
142 Reads from the file handle provided as the fisrt argument to block, then
143 execv() the command to be traced.
144 """
145 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
146 try:
147 os.write(
148 handle,
149 (
150 'import os, sys\n'
151 'fd = int(sys.argv[1])\n'
152 # This will block until the controlling process writes a byte on the
153 # pipe. It will do so once the tracing tool, e.g. strace, is ready to
154 # trace.
155 'os.read(fd, 1)\n'
156 'os.close(fd)\n'
157 'os.execv(sys.argv[2], sys.argv[2:])\n'
158 ))
159 finally:
160 os.close(handle)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000161 return name
162
163
164def strace_process_quoted_arguments(text):
165 """Extracts quoted arguments on a string and return the arguments as a list.
166
167 Implemented as an automaton. Supports incomplete strings in the form
168 '"foo"...'.
169
170 Example:
171 With text = '"foo", "bar"', the function will return ['foo', 'bar']
172
173 TODO(maruel): Implement escaping.
174 """
175 # All the possible states of the DFA.
176 ( NEED_QUOTE, # Begining of a new arguments.
177 INSIDE_STRING, # Inside an argument.
178 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
179 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
180 # a serie of 3 dots or a comma.
181 NEED_SPACE, # Right after a comma
182 NEED_DOT_2, # Found a dot, need a second one.
183 NEED_DOT_3, # Found second dot, need a third one.
184 NEED_COMMA, # Found third dot, need a comma.
185 ) = range(8)
186
187 state = NEED_QUOTE
188 out = []
189 for index, char in enumerate(text):
190 if char == '"':
191 if state == NEED_QUOTE:
192 state = INSIDE_STRING
193 # A new argument was found.
194 out.append('')
195 elif state == INSIDE_STRING:
196 # The argument is now closed.
197 state = NEED_COMMA_OR_DOT
198 elif state == ESCAPED:
199 out[-1] += char
200 state = INSIDE_STRING
201 else:
202 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000203 'Can\'t process char \'%s\' at column %d for: %r' % (
204 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000205 index,
206 text)
207 elif char == ',':
208 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
209 state = NEED_SPACE
210 elif state == INSIDE_STRING:
211 out[-1] += char
212 elif state == ESCAPED:
213 out[-1] += char
214 state = INSIDE_STRING
215 else:
216 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000217 'Can\'t process char \'%s\' at column %d for: %r' % (
218 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000219 index,
220 text)
221 elif char == ' ':
222 if state == NEED_SPACE:
223 state = NEED_QUOTE
224 elif state == INSIDE_STRING:
225 out[-1] += char
226 elif state == ESCAPED:
227 out[-1] += char
228 state = INSIDE_STRING
229 else:
230 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000231 'Can\'t process char \'%s\' at column %d for: %r' % (
232 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000233 index,
234 text)
235 elif char == '.':
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000236 if state in (NEED_QUOTE, NEED_COMMA_OR_DOT):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000237 # The string is incomplete, this mean the strace -s flag should be
238 # increased.
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000239 # For NEED_QUOTE, the input string would look like '"foo", ...'.
240 # For NEED_COMMA_OR_DOT, the input string would look like '"foo"...'
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000241 state = NEED_DOT_2
242 elif state == NEED_DOT_2:
243 state = NEED_DOT_3
244 elif state == NEED_DOT_3:
245 state = NEED_COMMA
246 elif state == INSIDE_STRING:
247 out[-1] += char
248 elif state == ESCAPED:
249 out[-1] += char
250 state = INSIDE_STRING
251 else:
252 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000253 'Can\'t process char \'%s\' at column %d for: %r' % (
254 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000255 index,
256 text)
257 elif char == '\\':
258 if state == ESCAPED:
259 out[-1] += char
260 state = INSIDE_STRING
261 elif state == INSIDE_STRING:
262 state = ESCAPED
263 else:
264 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000265 'Can\'t process char \'%s\' at column %d for: %r' % (
266 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000267 index,
268 text)
269 else:
270 if state == INSIDE_STRING:
271 out[-1] += char
272 else:
273 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000274 'Can\'t process char \'%s\' at column %d for: %r' % (
275 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000276 index,
277 text)
278 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
279 raise ValueError(
280 'String is incorrectly terminated: %r' % text,
281 text)
282 return out
283
284
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000285def assert_is_renderable(pseudo_string):
286 """Asserts the input is a valid object to be processed by render()."""
287 assert (
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000288 pseudo_string is None or
289 isinstance(pseudo_string, unicode) or
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000290 hasattr(pseudo_string, 'render')), repr(pseudo_string)
291
292
293def render(pseudo_string):
294 """Converts the pseudo-string to an unicode string."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000295 if pseudo_string is None or isinstance(pseudo_string, unicode):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000296 return pseudo_string
297 return pseudo_string.render()
298
299
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000300class Results(object):
301 """Results of a trace session."""
302
303 class _TouchedObject(object):
304 """Something, a file or a directory, that was accessed."""
305 def __init__(self, root, path, tainted, size, nb_files):
306 logging.debug(
307 '%s(%s, %s, %s, %s, %s)' %
308 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000309 assert_is_renderable(root)
310 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000311 self.root = root
312 self.path = path
313 self.tainted = tainted
314 self.nb_files = nb_files
maruel@chromium.org538141b2013-06-03 20:57:17 +0000315 # Can be used as a cache or a default value, depending on context. In
316 # particular, once self.tainted is True, because the path was replaced
317 # with a variable, it is not possible to look up the file size.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000318 self._size = size
319 # These are cache only.
320 self._real_path = None
321
322 # Check internal consistency.
323 assert path, path
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000324 assert tainted or bool(root) != bool(file_path.isabs(path)), (root, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000325 assert tainted or (
326 not os.path.exists(self.full_path) or
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000327 (self.full_path == file_path.get_native_path_case(self.full_path))), (
328 tainted,
329 self.full_path,
330 file_path.get_native_path_case(self.full_path))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000331
332 @property
333 def existent(self):
334 return self.size != -1
335
336 @property
337 def full_path(self):
338 if self.root:
339 return os.path.join(self.root, self.path)
340 return self.path
341
342 @property
343 def real_path(self):
344 """Returns the path with symlinks resolved."""
345 if not self._real_path:
346 self._real_path = os.path.realpath(self.full_path)
347 return self._real_path
348
349 @property
350 def size(self):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000351 """File's size. -1 is not existent.
352
353 Once tainted, it is not possible the retrieve the file size anymore since
354 the path is composed of variables.
355 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000356 if self._size is None and not self.tainted:
357 try:
358 self._size = os.stat(self.full_path).st_size
359 except OSError:
360 self._size = -1
361 return self._size
362
363 def flatten(self):
364 """Returns a dict representing this object.
365
366 A 'size' of 0 means the file was only touched and not read.
367 """
368 return {
369 'path': self.path,
370 'size': self.size,
371 }
372
373 def replace_variables(self, variables):
374 """Replaces the root of this File with one of the variables if it matches.
375
376 If a variable replacement occurs, the cloned object becomes tainted.
377 """
378 for variable, root_path in variables.iteritems():
379 if self.path.startswith(root_path):
380 return self._clone(
381 self.root, variable + self.path[len(root_path):], True)
382 # No need to clone, returns ourself.
383 return self
384
385 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000386 """Returns a clone of itself with 'root' stripped off.
387
388 Note that the file is kept if it is either accessible from a symlinked
389 path that was used to access the file or through the real path.
390 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000391 # Check internal consistency.
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000392 assert (
393 self.tainted or
394 (file_path.isabs(root) and root.endswith(os.path.sep))), root
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000395 if not self.full_path.startswith(root):
396 # Now try to resolve the symlinks to see if it can be reached this way.
397 # Only try *after* trying without resolving symlink.
398 if not self.real_path.startswith(root):
399 return None
400 path = self.real_path
401 else:
402 path = self.full_path
403 return self._clone(root, path[len(root):], self.tainted)
404
405 def _clone(self, new_root, new_path, tainted):
406 raise NotImplementedError(self.__class__.__name__)
407
408 class File(_TouchedObject):
409 """A file that was accessed. May not be present anymore.
410
411 If tainted is true, it means it is not a real path anymore as a variable
412 replacement occured.
413
maruel@chromium.org538141b2013-06-03 20:57:17 +0000414 |mode| can be one of None, TOUCHED, READ or WRITE.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000415 """
maruel@chromium.org538141b2013-06-03 20:57:17 +0000416 # Was probed for existence, and it is existent, but was never _opened_.
417 TOUCHED = 't'
418 # Opened for read only and guaranteed to not have been written to.
419 READ = 'r'
420 # Opened for write.
421 WRITE = 'w'
422
423 # They are listed in order of priority. E.g. if a file is traced as TOUCHED
424 # then as WRITE, only keep WRITE. None means no idea, which is a problem on
425 # Windows.
426 ACCEPTABLE_MODES = (None, TOUCHED, READ, WRITE)
427
428 def __init__(self, root, path, tainted, size, mode):
429 assert mode in self.ACCEPTABLE_MODES
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000430 super(Results.File, self).__init__(root, path, tainted, size, 1)
maruel@chromium.org538141b2013-06-03 20:57:17 +0000431 self.mode = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000432
433 def _clone(self, new_root, new_path, tainted):
434 """Clones itself keeping meta-data."""
435 # Keep the self.size and self._real_path caches for performance reason. It
436 # is also important when the file becomes tainted (with a variable instead
437 # of the real path) since self.path is not an on-disk path anymore so
438 # out._size cannot be updated.
maruel@chromium.org538141b2013-06-03 20:57:17 +0000439 out = self.__class__(new_root, new_path, tainted, self.size, self.mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000440 out._real_path = self._real_path
441 return out
442
maruel@chromium.org538141b2013-06-03 20:57:17 +0000443 def flatten(self):
444 out = super(Results.File, self).flatten()
445 out['mode'] = self.mode
446 return out
447
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000448 class Directory(_TouchedObject):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000449 """A directory of files. Must exist.
450
451 For a Directory instance, self.size is not a cache, it's an actual value
452 that is never modified and represents the total size of the files contained
453 in this directory. It is possible that the directory is empty so that
454 size==0; this happens if there's only an invalid symlink in it.
455 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000456 def __init__(self, root, path, tainted, size, nb_files):
457 """path='.' is a valid value and must be handled appropriately."""
458 assert not path.endswith(os.path.sep), path
459 super(Results.Directory, self).__init__(
460 root, path + os.path.sep, tainted, size, nb_files)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000461
462 def flatten(self):
463 out = super(Results.Directory, self).flatten()
464 out['nb_files'] = self.nb_files
465 return out
466
467 def _clone(self, new_root, new_path, tainted):
468 """Clones itself keeping meta-data."""
469 out = self.__class__(
470 new_root,
471 new_path.rstrip(os.path.sep),
472 tainted,
473 self.size,
474 self.nb_files)
475 out._real_path = self._real_path
476 return out
477
478 class Process(object):
479 """A process that was traced.
480
481 Contains references to the files accessed by this process and its children.
482 """
483 def __init__(self, pid, files, executable, command, initial_cwd, children):
484 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
485 self.pid = pid
486 self.files = sorted(files, key=lambda x: x.path)
487 self.children = children
488 self.executable = executable
489 self.command = command
490 self.initial_cwd = initial_cwd
491
492 # Check internal consistency.
493 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
494 f.path for f in self.files)
495 assert isinstance(self.children, list)
496 assert isinstance(self.files, list)
497
498 @property
499 def all(self):
500 for child in self.children:
501 for i in child.all:
502 yield i
503 yield self
504
505 def flatten(self):
506 return {
507 'children': [c.flatten() for c in self.children],
508 'command': self.command,
509 'executable': self.executable,
510 'files': [f.flatten() for f in self.files],
511 'initial_cwd': self.initial_cwd,
512 'pid': self.pid,
513 }
514
515 def strip_root(self, root):
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000516 assert file_path.isabs(root) and root.endswith(os.path.sep), root
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000517 # Loads the files after since they are constructed as objects.
518 out = self.__class__(
519 self.pid,
520 filter(None, (f.strip_root(root) for f in self.files)),
521 self.executable,
522 self.command,
523 self.initial_cwd,
524 [c.strip_root(root) for c in self.children])
525 logging.debug(
526 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
527 return out
528
529 def __init__(self, process):
530 self.process = process
531 # Cache.
532 self._files = None
533
534 def flatten(self):
535 return {
536 'root': self.process.flatten(),
537 }
538
539 @property
540 def files(self):
541 if self._files is None:
542 self._files = sorted(
543 sum((p.files for p in self.process.all), []),
544 key=lambda x: x.path)
545 return self._files
546
547 @property
548 def existent(self):
549 return [f for f in self.files if f.existent]
550
551 @property
552 def non_existent(self):
553 return [f for f in self.files if not f.existent]
554
555 def strip_root(self, root):
556 """Returns a clone with all the files outside the directory |root| removed
557 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000558
559 It keeps files accessible through the |root| directory or that have been
560 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000561 """
562 # Resolve any symlink
563 root = os.path.realpath(root)
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000564 root = (
565 file_path.get_native_path_case(root).rstrip(os.path.sep) + os.path.sep)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000566 logging.debug('strip_root(%s)' % root)
567 return Results(self.process.strip_root(root))
568
569
570class ApiBase(object):
571 """OS-agnostic API to trace a process and its children."""
572 class Context(object):
573 """Processes one log line at a time and keeps the list of traced processes.
574
575 The parsing is complicated by the fact that logs are traced out of order for
576 strace but in-order for dtrace and logman. In addition, on Windows it is
577 very frequent that processids are reused so a flat list cannot be used. But
578 at the same time, it is impossible to faithfully construct a graph when the
579 logs are processed out of order. So both a tree and a flat mapping are used,
580 the tree is the real process tree, while the flat mapping stores the last
581 valid process for the corresponding processid. For the strace case, the
582 tree's head is guessed at the last moment.
583 """
584 class Process(object):
585 """Keeps context for one traced child process.
586
587 Logs all the files this process touched. Ignores directories.
588 """
589 def __init__(self, blacklist, pid, initial_cwd):
590 # Check internal consistency.
591 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000592 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000593 self.pid = pid
594 # children are Process instances.
595 self.children = []
596 self.initial_cwd = initial_cwd
597 self.cwd = None
maruel@chromium.org538141b2013-06-03 20:57:17 +0000598 self.files = {}
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000599 self.executable = None
600 self.command = None
601 self._blacklist = blacklist
602
603 def to_results_process(self):
604 """Resolves file case sensitivity and or late-bound strings."""
605 # When resolving files, it's normal to get dupe because a file could be
606 # opened multiple times with different case. Resolve the deduplication
607 # here.
608 def fix_path(x):
609 """Returns the native file path case.
610
611 Converts late-bound strings.
612 """
613 if not x:
614 # Do not convert None instance to 'None'.
615 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000616 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000617 if os.path.isabs(x):
618 # If the path is not absolute, which tends to happen occasionally on
619 # Windows, it is not possible to get the native path case so ignore
620 # that trace. It mostly happens for 'executable' value.
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000621 x = file_path.get_native_path_case(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000622 return x
623
maruel@chromium.org538141b2013-06-03 20:57:17 +0000624 def fix_and_blacklist_path(x, m):
625 """Receives a tuple (filepath, mode) and processes filepath."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000626 x = fix_path(x)
627 if not x:
628 return
629 # The blacklist needs to be reapplied, since path casing could
630 # influence blacklisting.
631 if self._blacklist(x):
632 return
maruel@chromium.org538141b2013-06-03 20:57:17 +0000633 # Filters out directories. Some may have passed through.
634 if os.path.isdir(x):
635 return
636 return x, m
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000637
maruel@chromium.org538141b2013-06-03 20:57:17 +0000638 # Renders all the files as strings, as some could be RelativePath
639 # instances. It is important to do it first since there could still be
640 # multiple entries with the same path but different modes.
641 rendered = (
642 fix_and_blacklist_path(f, m) for f, m in self.files.iteritems())
643 files = sorted(
644 (f for f in rendered if f),
645 key=lambda x: (x[0], Results.File.ACCEPTABLE_MODES.index(x[1])))
646 # Then converting into a dict will automatically clean up lesser
647 # important values.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000648 files = [
maruel@chromium.org538141b2013-06-03 20:57:17 +0000649 Results.File(None, f, False, None, m)
650 for f, m in dict(files).iteritems()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000651 ]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000652 return Results.Process(
653 self.pid,
654 files,
655 fix_path(self.executable),
656 self.command,
657 fix_path(self.initial_cwd),
658 [c.to_results_process() for c in self.children])
659
maruel@chromium.org538141b2013-06-03 20:57:17 +0000660 def add_file(self, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000661 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000662 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000663 return
maruel@chromium.org538141b2013-06-03 20:57:17 +0000664 logging.debug('add_file(%d, %s, %s)', self.pid, filepath, mode)
665 # Note that filepath and not render(filepath) is added. It is because
666 # filepath could be something else than a string, like a RelativePath
667 # instance for dtrace logs.
668 modes = Results.File.ACCEPTABLE_MODES
669 old_mode = self.files.setdefault(filepath, mode)
670 if old_mode != mode and modes.index(old_mode) < modes.index(mode):
671 # Take the highest value.
672 self.files[filepath] = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000673
674 def __init__(self, blacklist):
675 self.blacklist = blacklist
676 # Initial process.
677 self.root_process = None
678 # dict to accelerate process lookup, to not have to lookup the whole graph
679 # each time.
680 self._process_lookup = {}
681
682 class Tracer(object):
683 """During it's lifetime, the tracing subsystem is enabled."""
684 def __init__(self, logname):
685 self._logname = logname
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000686 self._lock = threading.RLock()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000687 self._traces = []
688 self._initialized = True
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000689 self._scripts_to_cleanup = []
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000690
691 def trace(self, cmd, cwd, tracename, output):
692 """Runs the OS-specific trace program on an executable.
693
694 Arguments:
695 - cmd: The command (a list) to run.
696 - cwd: Current directory to start the child process in.
697 - tracename: Name of the trace in the logname file.
698 - output: If False, redirects output to PIPEs.
699
700 Returns a tuple (resultcode, output) and updates the internal trace
701 entries.
702 """
703 # The implementation adds an item to self._traces.
704 raise NotImplementedError(self.__class__.__name__)
705
706 def close(self, _timeout=None):
707 """Saves the meta-data in the logname file.
708
709 For kernel-based tracing, stops the tracing subsystem.
710
711 Must not be used manually when using 'with' construct.
712 """
713 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000714 if not self._initialized:
715 raise TracingFailure(
716 'Called %s.close() on an unitialized object' %
717 self.__class__.__name__,
718 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000719 try:
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000720 while self._scripts_to_cleanup:
721 try:
722 os.remove(self._scripts_to_cleanup.pop())
723 except OSError as e:
724 logging.error('Failed to delete a temporary script: %s', e)
Marc-Antoine Ruelde011802013-11-12 15:19:47 -0500725 tools.write_json(self._logname, self._gen_logdata(), False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000726 finally:
727 self._initialized = False
728
729 def post_process_log(self):
730 """Post-processes the log so it becomes faster to load afterward.
731
732 Must not be used manually when using 'with' construct.
733 """
734 assert not self._initialized, 'Must stop tracing first.'
735
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000736 def _gen_logdata(self):
737 """Returns the data to be saved in the trace file."""
738 return {
739 'traces': self._traces,
740 }
741
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000742 def __enter__(self):
743 """Enables 'with' statement."""
744 return self
745
746 def __exit__(self, exc_type, exc_value, traceback):
747 """Enables 'with' statement."""
748 self.close()
749 # If an exception was thrown, do not process logs.
750 if not exc_type:
751 self.post_process_log()
752
753 def get_tracer(self, logname):
754 """Returns an ApiBase.Tracer instance.
755
756 Initializes the tracing subsystem, which is a requirement for kernel-based
757 tracers. Only one tracer instance should be live at a time!
758
759 logname is the filepath to the json file that will contain the meta-data
760 about the logs.
761 """
762 return self.Tracer(logname)
763
764 @staticmethod
765 def clean_trace(logname):
766 """Deletes an old log."""
767 raise NotImplementedError()
768
769 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +0000770 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000771 """Processes trace logs and returns the files opened and the files that do
772 not exist.
773
774 It does not track directories.
775
maruel@chromium.orgec74ff82012-10-29 18:14:47 +0000776 Arguments:
777 - logname: must be an absolute path.
778 - blacklist: must be a lambda.
779 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +0000780
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000781 Most of the time, files that do not exist are temporary test files that
782 should be put in /tmp instead. See http://crbug.com/116251.
783
784 Returns a list of dict with keys:
785 - results: A Results instance.
786 - trace: The corresponding tracename parameter provided to
787 get_tracer().trace().
788 - output: Output gathered during execution, if get_tracer().trace(...,
789 output=False) was used.
790 """
791 raise NotImplementedError(cls.__class__.__name__)
792
793
794class Strace(ApiBase):
795 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000796 @staticmethod
797 def load_filename(filename):
798 """Parses a filename in a log."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000799 # TODO(maruel): Be compatible with strace -x.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000800 assert isinstance(filename, str)
801 out = ''
802 i = 0
803 while i < len(filename):
804 c = filename[i]
805 if c == '\\':
806 out += chr(int(filename[i+1:i+4], 8))
807 i += 4
808 else:
809 out += c
810 i += 1
811 # TODO(maruel): That's not necessarily true that the current code page is
812 # utf-8.
813 return out.decode('utf-8')
814
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000815 class Context(ApiBase.Context):
816 """Processes a strace log line and keeps the list of existent and non
817 existent files accessed.
818
819 Ignores directories.
820
821 Uses late-binding to processes the cwd of each process. The problem is that
822 strace generates one log file per process it traced but doesn't give any
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000823 information about which process was started when and by who. So process the
824 logs out of order and use late binding with RelativePath to be able to
825 deduce the initial directory of each process once all the logs are parsed.
826
827 TODO(maruel): Use the script even in the non-sudo case, so log parsing can
828 be done in two phase: first find the root process, then process the child
829 processes in order. With that, it should be possible to not use RelativePath
830 anymore. This would significantly simplify the code!
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000831 """
832 class Process(ApiBase.Context.Process):
833 """Represents the state of a process.
834
835 Contains all the information retrieved from the pid-specific log.
836 """
837 # Function names are using ([a-z_0-9]+)
838 # This is the most common format. function(args) = result
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000839 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.*?)\)\s+= (.+)$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000840 # An interrupted function call, only grab the minimal header.
841 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
842 # A resumed function call.
843 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
844 # A process received a signal.
845 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
846 # A process didn't handle a signal. Ignore any junk appearing before,
847 # because the process was forcibly killed so it won't open any new file.
848 RE_KILLED = re.compile(
849 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
850 # The process has exited.
851 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
852 # A call was canceled. Ignore any prefix.
853 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
854 # Happens when strace fails to even get the function name.
855 UNNAMED_FUNCTION = '????'
856
857 # Corner-case in python, a class member function decorator must not be
858 # @staticmethod.
859 def parse_args(regexp, expect_zero): # pylint: disable=E0213
860 """Automatically convert the str 'args' into a list of processed
861 arguments.
862
863 Arguments:
864 - regexp is used to parse args.
865 - expect_zero: one of True, False or None.
866 - True: will check for result.startswith('0') first and will ignore
867 the trace line completely otherwise. This is important because for
868 many functions, the regexp will not process if the call failed.
869 - False: will check for not result.startswith(('?', '-1')) for the
870 same reason than with True.
871 - None: ignore result.
872 """
873 def meta_hook(function):
874 assert function.__name__.startswith('handle_')
875 def hook(self, args, result):
876 if expect_zero is True and not result.startswith('0'):
877 return
878 if expect_zero is False and result.startswith(('?', '-1')):
879 return
880 match = re.match(regexp, args)
881 if not match:
882 raise TracingFailure(
883 'Failed to parse %s(%s) = %s' %
884 (function.__name__[len('handle_'):], args, result),
885 None, None, None)
886 return function(self, match.groups(), result)
887 return hook
888 return meta_hook
889
890 class RelativePath(object):
891 """A late-bound relative path."""
892 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000893 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000894 self.parent = parent
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000895 assert (
896 value is None or
897 (isinstance(value, unicode) and not os.path.isabs(value)))
898 self.value = value
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000899 if self.value:
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000900 # TODO(maruel): On POSIX, '\\' is a valid character so remove this
901 # assert.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000902 assert '\\' not in self.value, value
903 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000904
905 def render(self):
906 """Returns the current directory this instance is representing.
907
908 This function is used to return the late-bound value.
909 """
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000910 assert self.parent is not None
911 parent = render(self.parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000912 if self.value:
913 return os.path.normpath(os.path.join(parent, self.value))
914 return parent
915
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000916 def __init__(self, root, pid):
917 """Keeps enough information to be able to guess the original process
918 root.
919
920 strace doesn't store which process was the initial process. So more
921 information needs to be kept so the graph can be reconstructed from the
922 flat map.
923 """
924 logging.info('%s(%d)' % (self.__class__.__name__, pid))
925 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
926 assert isinstance(root, ApiBase.Context)
927 self._root = weakref.ref(root)
928 # The dict key is the function name of the pending call, like 'open'
929 # or 'execve'.
930 self._pending_calls = {}
931 self._line_number = 0
932 # Current directory when the process started.
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000933 if isinstance(self._root(), unicode):
934 self.initial_cwd = self._root()
935 else:
936 self.initial_cwd = self.RelativePath(self._root(), None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000937 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +0000938 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000939
940 def get_cwd(self):
941 """Returns the best known value of cwd."""
942 return self.cwd or self.initial_cwd
943
944 def render(self):
945 """Returns the string value of the RelativePath() object.
946
947 Used by RelativePath. Returns the initial directory and not the
948 current one since the current directory 'cwd' validity is time-limited.
949
950 The validity is only guaranteed once all the logs are processed.
951 """
952 return self.initial_cwd.render()
953
954 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000955 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000956 self._line_number += 1
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000957 try:
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000958 if self._done:
959 raise TracingFailure(
960 'Found a trace for a terminated process or corrupted log',
961 None, None, None)
962
963 if self.RE_SIGNAL.match(line):
964 # Ignore signals.
965 return
966
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000967 match = self.RE_KILLED.match(line)
968 if match:
969 # Converts a '+++ killed by Foo +++' trace into an exit_group().
970 self.handle_exit_group(match.group(1), None)
971 return
972
973 match = self.RE_PROCESS_EXITED.match(line)
974 if match:
975 # Converts a '+++ exited with 1 +++' trace into an exit_group()
976 self.handle_exit_group(match.group(1), None)
977 return
978
979 match = self.RE_UNFINISHED.match(line)
980 if match:
981 if match.group(1) in self._pending_calls:
982 raise TracingFailure(
983 'Found two unfinished calls for the same function',
984 None, None, None,
985 self._pending_calls)
986 self._pending_calls[match.group(1)] = (
987 match.group(1) + match.group(2))
988 return
989
990 match = self.RE_UNAVAILABLE.match(line)
991 if match:
992 # This usually means a process was killed and a pending call was
993 # canceled.
994 # TODO(maruel): Look up the last exit_group() trace just above and
995 # make sure any self._pending_calls[anything] is properly flushed.
996 return
997
998 match = self.RE_RESUMED.match(line)
999 if match:
1000 if match.group(1) not in self._pending_calls:
1001 raise TracingFailure(
1002 'Found a resumed call that was not logged as unfinished',
1003 None, None, None,
1004 self._pending_calls)
1005 pending = self._pending_calls.pop(match.group(1))
1006 # Reconstruct the line.
1007 line = pending + match.group(2)
1008
1009 match = self.RE_HEADER.match(line)
1010 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001011 # The line is corrupted. It happens occasionally when a process is
1012 # killed forcibly with activity going on. Assume the process died.
1013 # No other line can be processed afterward.
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001014 logging.debug('%d is done: %s', self.pid, line)
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001015 self._done = True
1016 return
1017
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001018 if match.group(1) == self.UNNAMED_FUNCTION:
1019 return
1020
1021 # It's a valid line, handle it.
1022 handler = getattr(self, 'handle_%s' % match.group(1), None)
1023 if not handler:
1024 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1025 return handler(match.group(2), match.group(3))
1026 except TracingFailure, e:
1027 # Hack in the values since the handler could be a static function.
1028 e.pid = self.pid
1029 e.line = line
1030 e.line_number = self._line_number
1031 # Re-raise the modified exception.
1032 raise
1033 except (KeyError, NotImplementedError, ValueError), e:
1034 raise TracingFailure(
1035 'Trace generated a %s exception: %s' % (
1036 e.__class__.__name__, str(e)),
1037 self.pid,
1038 self._line_number,
1039 line,
1040 e)
1041
1042 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1043 def handle_access(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001044 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001045
1046 @parse_args(r'^\"(.+?)\"$', True)
1047 def handle_chdir(self, args, _result):
1048 """Updates cwd."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001049 self.cwd = self._mangle(args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001050 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1051
maruel@chromium.org538141b2013-06-03 20:57:17 +00001052 @parse_args(r'^\"(.+?)\", (\d+), (\d+)$', False)
1053 def handle_chown(self, args, _result):
1054 # TODO(maruel): Look at result?
1055 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org55550722013-05-28 14:43:14 +00001056
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001057 def handle_clone(self, _args, result):
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001058 self._handling_forking('clone', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001059
1060 def handle_close(self, _args, _result):
1061 pass
1062
maruel@chromium.org538141b2013-06-03 20:57:17 +00001063 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1064 def handle_chmod(self, args, _result):
1065 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001066
maruel@chromium.org538141b2013-06-03 20:57:17 +00001067 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1068 def handle_creat(self, args, _result):
1069 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001070
1071 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1072 def handle_execve(self, args, _result):
1073 # Even if in practice execve() doesn't returns when it succeeds, strace
1074 # still prints '0' as the result.
1075 filepath = args[0]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001076 self._handle_file(filepath, Results.File.READ)
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001077 self.executable = self._mangle(filepath)
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001078 try:
1079 self.command = strace_process_quoted_arguments(args[1])
1080 except ValueError as e:
1081 raise TracingFailure(
1082 'Failed to process command line argument:\n%s' % e.args[0],
1083 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001084
1085 def handle_exit_group(self, _args, _result):
1086 """Removes cwd."""
1087 self.cwd = None
1088
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001089 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1090 def handle_faccessat(self, args, _results):
1091 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001092 self._handle_file(args[1], Results.File.TOUCHED)
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001093 else:
1094 raise Exception('Relative faccess not implemented.')
1095
maruel@chromium.org0781f322013-05-28 19:45:49 +00001096 def handle_fallocate(self, _args, result):
1097 pass
1098
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001099 def handle_fork(self, args, result):
1100 self._handle_unknown('fork', args, result)
1101
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001102 def handle_futex(self, _args, _result):
1103 pass
1104
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001105 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1106 def handle_getcwd(self, args, _result):
1107 if os.path.isabs(args[0]):
1108 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1109 if not isinstance(self.cwd, unicode):
1110 # Take the occasion to reset the path.
1111 self.cwd = self._mangle(args[0])
1112 else:
1113 # It should always match.
1114 assert self.cwd == Strace.load_filename(args[0]), (
1115 self.cwd, args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001116
1117 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1118 def handle_link(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001119 self._handle_file(args[0], Results.File.READ)
1120 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001121
1122 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1123 def handle_lstat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001124 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001125
1126 def handle_mkdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001127 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001128 pass
1129
1130 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1131 def handle_open(self, args, _result):
1132 if 'O_DIRECTORY' in args[1]:
1133 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001134 self._handle_file(
1135 args[0],
1136 Results.File.READ if 'O_RDONLY' in args[1] else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001137
1138 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1139 def handle_openat(self, args, _result):
1140 if 'O_DIRECTORY' in args[2]:
1141 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001142 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001143 self._handle_file(
1144 args[1],
1145 Results.File.READ if 'O_RDONLY' in args[2]
1146 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001147 else:
1148 # TODO(maruel): Implement relative open if necessary instead of the
1149 # AT_FDCWD flag, let's hope not since this means tracking all active
1150 # directory handles.
maruel@chromium.org538141b2013-06-03 20:57:17 +00001151 raise NotImplementedError('Relative open via openat not implemented.')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001152
1153 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1154 def handle_readlink(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001155 self._handle_file(args[0], Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001156
1157 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1158 def handle_rename(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001159 self._handle_file(args[0], Results.File.READ)
1160 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001161
1162 def handle_rmdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001163 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001164 pass
1165
1166 def handle_setxattr(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001167 # TODO(maruel): self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001168 pass
1169
1170 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1171 def handle_stat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001172 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001173
maruel@chromium.org538141b2013-06-03 20:57:17 +00001174 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1175 def handle_symlink(self, args, _result):
1176 self._handle_file(args[0], Results.File.TOUCHED)
1177 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001178
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001179 @parse_args(r'^\"(.+?)\", \d+', True)
1180 def handle_truncate(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001181 self._handle_file(args[0], Results.File.WRITE)
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001182
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001183 def handle_unlink(self, _args, _result):
1184 # In theory, the file had to be created anyway.
1185 pass
1186
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001187 def handle_unlinkat(self, _args, _result):
1188 # In theory, the file had to be created anyway.
1189 pass
1190
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001191 def handle_statfs(self, _args, _result):
1192 pass
1193
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001194 def handle_utimensat(self, _args, _result):
1195 pass
1196
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001197 def handle_vfork(self, _args, result):
1198 self._handling_forking('vfork', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001199
1200 @staticmethod
1201 def _handle_unknown(function, args, result):
1202 raise TracingFailure(
1203 'Unexpected/unimplemented trace %s(%s)= %s' %
1204 (function, args, result),
1205 None, None, None)
1206
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001207 def _handling_forking(self, name, result):
1208 """Transfers cwd."""
1209 if result.startswith(('?', '-1')):
1210 # The call failed.
1211 return
1212 # Update the other process right away.
1213 childpid = int(result)
1214 child = self._root().get_or_set_proc(childpid)
1215 if child.parentid is not None or childpid in self.children:
1216 raise TracingFailure(
1217 'Found internal inconsitency in process lifetime detection '
1218 'during a %s() call' % name,
1219 None, None, None)
1220
1221 # Copy the cwd object.
1222 child.initial_cwd = self.get_cwd()
1223 child.parentid = self.pid
1224 # It is necessary because the logs are processed out of order.
1225 self.children.append(child)
1226
maruel@chromium.org538141b2013-06-03 20:57:17 +00001227 def _handle_file(self, filepath, mode):
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001228 filepath = self._mangle(filepath)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001229 self.add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001230
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001231 def _mangle(self, filepath):
1232 """Decodes a filepath found in the log and convert it to a late-bound
1233 path if necessary.
1234
1235 |filepath| is an strace 'encoded' string and the returned value is
1236 either an unicode string if the path was absolute or a late bound path
1237 otherwise.
1238 """
1239 filepath = Strace.load_filename(filepath)
1240 if os.path.isabs(filepath):
1241 return filepath
1242 else:
1243 if isinstance(self.get_cwd(), unicode):
1244 return os.path.normpath(os.path.join(self.get_cwd(), filepath))
1245 return self.RelativePath(self.get_cwd(), filepath)
1246
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001247 def __init__(self, blacklist, root_pid, initial_cwd):
1248 """|root_pid| may be None when the root process is not known.
1249
1250 In that case, a search is done after reading all the logs to figure out
1251 the root process.
1252 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001253 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001254 assert_is_renderable(initial_cwd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001255 self.root_pid = root_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001256 self.initial_cwd = initial_cwd
1257
1258 def render(self):
1259 """Returns the string value of the initial cwd of the root process.
1260
1261 Used by RelativePath.
1262 """
1263 return self.initial_cwd
1264
1265 def on_line(self, pid, line):
1266 """Transfers control into the Process.on_line() function."""
1267 self.get_or_set_proc(pid).on_line(line.strip())
1268
1269 def to_results(self):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001270 """If necessary, finds back the root process and verify consistency."""
1271 if not self.root_pid:
1272 # The non-sudo case. The traced process was started by strace itself,
1273 # so the pid of the traced process is not known.
1274 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1275 if len(root) == 1:
1276 self.root_process = root[0]
1277 # Save it for later.
1278 self.root_pid = self.root_process.pid
1279 else:
1280 # The sudo case. The traced process was started manually so its pid is
1281 # known.
1282 self.root_process = self._process_lookup.get(self.root_pid)
1283 if not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001284 raise TracingFailure(
1285 'Found internal inconsitency in process lifetime detection '
1286 'while finding the root process',
1287 None,
1288 None,
1289 None,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001290 self.root_pid,
1291 sorted(self._process_lookup))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001292 process = self.root_process.to_results_process()
1293 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1294 raise TracingFailure(
1295 'Found internal inconsitency in process lifetime detection '
1296 'while looking for len(tree) == len(list)',
1297 None,
1298 None,
1299 None,
1300 sorted(self._process_lookup),
1301 sorted(p.pid for p in process.all))
1302 return Results(process)
1303
1304 def get_or_set_proc(self, pid):
1305 """Returns the Context.Process instance for this pid or creates a new one.
1306 """
1307 if not pid or not isinstance(pid, int):
1308 raise TracingFailure(
1309 'Unpexpected value for pid: %r' % pid,
1310 pid,
1311 None,
1312 None,
1313 pid)
1314 if pid not in self._process_lookup:
1315 self._process_lookup[pid] = self.Process(self, pid)
1316 return self._process_lookup[pid]
1317
1318 @classmethod
1319 def traces(cls):
1320 """Returns the list of all handled traces to pass this as an argument to
1321 strace.
1322 """
1323 prefix = 'handle_'
1324 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1325
1326 class Tracer(ApiBase.Tracer):
1327 MAX_LEN = 256
1328
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001329 def __init__(self, logname, use_sudo):
1330 super(Strace.Tracer, self).__init__(logname)
1331 self.use_sudo = use_sudo
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001332 if use_sudo:
1333 # TODO(maruel): Use the jump script systematically to make it easy to
1334 # figure out the root process, so RelativePath is not necessary anymore.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001335 self._child_script = create_exec_thunk()
1336 self._scripts_to_cleanup.append(self._child_script)
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001337
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001338 def trace(self, cmd, cwd, tracename, output):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001339 """Runs strace on an executable.
1340
1341 When use_sudo=True, it is a 3-phases process: start the thunk, start
1342 sudo strace with the pid of the thunk and then have the thunk os.execve()
1343 the process to trace.
1344 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001345 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1346 assert os.path.isabs(cmd[0]), cmd[0]
1347 assert os.path.isabs(cwd), cwd
1348 assert os.path.normpath(cwd) == cwd, cwd
1349 with self._lock:
1350 if not self._initialized:
1351 raise TracingFailure(
1352 'Called Tracer.trace() on an unitialized object',
1353 None, None, None, tracename)
1354 assert tracename not in (i['trace'] for i in self._traces)
1355 stdout = stderr = None
1356 if output:
1357 stdout = subprocess.PIPE
1358 stderr = subprocess.STDOUT
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001359
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001360 # Ensure all file related APIs are hooked.
1361 traces = ','.join(Strace.Context.traces() + ['file'])
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001362 flags = [
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001363 # Each child process has its own trace file. It is necessary because
1364 # strace may generate corrupted log file if multiple processes are
1365 # heavily doing syscalls simultaneously.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001366 '-ff',
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001367 # Reduce whitespace usage.
1368 '-a1',
1369 # hex encode non-ascii strings.
1370 # TODO(maruel): '-x',
1371 # TODO(maruel): '-ttt',
1372 # Signals are unnecessary noise here. Note the parser can cope with them
1373 # but reduce the unnecessary output.
1374 '-esignal=none',
1375 # Print as much data as wanted.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001376 '-s', '%d' % self.MAX_LEN,
1377 '-e', 'trace=%s' % traces,
1378 '-o', self._logname + '.' + tracename,
1379 ]
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001380
1381 if self.use_sudo:
1382 pipe_r, pipe_w = os.pipe()
1383 # Start the child process paused.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001384 target_cmd = [sys.executable, self._child_script, str(pipe_r)] + cmd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001385 logging.debug(' '.join(target_cmd))
1386 child_proc = subprocess.Popen(
1387 target_cmd,
1388 stdin=subprocess.PIPE,
1389 stdout=stdout,
1390 stderr=stderr,
1391 cwd=cwd)
1392
1393 # TODO(maruel): both processes must use the same UID for it to work
1394 # without sudo. Look why -p is failing at the moment without sudo.
1395 trace_cmd = [
1396 'sudo',
1397 'strace',
1398 '-p', str(child_proc.pid),
1399 ] + flags
1400 logging.debug(' '.join(trace_cmd))
1401 strace_proc = subprocess.Popen(
1402 trace_cmd,
1403 cwd=cwd,
1404 stdin=subprocess.PIPE,
1405 stdout=subprocess.PIPE,
1406 stderr=subprocess.PIPE)
1407
1408 line = strace_proc.stderr.readline()
1409 if not re.match(r'^Process \d+ attached \- interrupt to quit$', line):
1410 # TODO(maruel): Raise an exception.
1411 assert False, line
1412
1413 # Now fire the child process.
1414 os.write(pipe_w, 'x')
1415
1416 out = child_proc.communicate()[0]
1417 strace_out = strace_proc.communicate()[0]
1418
1419 # TODO(maruel): if strace_proc.returncode: Add an exception.
1420 saved_out = strace_out if strace_proc.returncode else out
1421 root_pid = child_proc.pid
1422 else:
1423 # Non-sudo case.
1424 trace_cmd = [
1425 'strace',
1426 ] + flags + cmd
1427 logging.debug(' '.join(trace_cmd))
1428 child_proc = subprocess.Popen(
1429 trace_cmd,
1430 cwd=cwd,
1431 stdin=subprocess.PIPE,
1432 stdout=stdout,
1433 stderr=stderr)
1434 out = child_proc.communicate()[0]
1435 # TODO(maruel): Walk the logs and figure out the root process would
1436 # simplify parsing the logs a *lot*.
1437 saved_out = out
1438 # The trace reader will have to figure out.
1439 root_pid = None
1440
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001441 with self._lock:
1442 assert tracename not in (i['trace'] for i in self._traces)
1443 self._traces.append(
1444 {
1445 'cmd': cmd,
1446 'cwd': cwd,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001447 'output': saved_out,
1448 'pid': root_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001449 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001450 })
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001451 return child_proc.returncode, out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001452
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001453 def __init__(self, use_sudo=None):
1454 super(Strace, self).__init__()
1455 self.use_sudo = use_sudo
1456
1457 def get_tracer(self, logname):
1458 return self.Tracer(logname, self.use_sudo)
1459
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001460 @staticmethod
1461 def clean_trace(logname):
1462 if os.path.isfile(logname):
1463 os.remove(logname)
1464 # Also delete any pid specific file from previous traces.
1465 for i in glob.iglob(logname + '.*'):
1466 if i.rsplit('.', 1)[1].isdigit():
1467 os.remove(i)
1468
1469 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001470 def parse_log(cls, logname, blacklist, trace_name):
1471 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001472 assert os.path.isabs(logname)
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05001473 data = tools.read_json(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001474 out = []
1475 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001476 if trace_name and item['trace'] != trace_name:
1477 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001478 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001479 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001480 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001481 }
1482 try:
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001483 context = cls.Context(blacklist, item['pid'], item['cwd'])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001484 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001485 logging.debug('Reading %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001486 pid = pidfile.rsplit('.', 1)[1]
1487 if pid.isdigit():
1488 pid = int(pid)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001489 found_line = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001490 for line in open(pidfile, 'rb'):
1491 context.on_line(pid, line)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001492 found_line = True
1493 if not found_line:
1494 # Ensures that a completely empty trace still creates the
1495 # corresponding Process instance by logging a dummy line.
1496 context.on_line(pid, '')
1497 else:
1498 logging.warning('Found unexpected file %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001499 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001500 except TracingFailure:
1501 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001502 out.append(result)
1503 return out
1504
1505
1506class Dtrace(ApiBase):
1507 """Uses DTrace framework through dtrace. Requires root access.
1508
1509 Implies Mac OSX.
1510
1511 dtruss can't be used because it has compatibility issues with python.
1512
1513 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1514 get the absolute path of the 'cwd' dtrace variable from the probe.
1515
1516 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1517 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1518 manually.
1519
1520 errno is not printed in the log since this implementation currently only cares
1521 about files that were successfully opened.
1522 """
1523 class Context(ApiBase.Context):
1524 # Format: index pid function(args)
1525 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1526
1527 # Arguments parsing.
1528 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1529 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1530 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1531 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1532 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1533 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1534
maruel@chromium.orgac361162013-06-04 15:54:06 +00001535 # O_DIRECTORY is not defined on Windows and dtrace doesn't exist on Windows.
1536 O_DIRECTORY = os.O_DIRECTORY if hasattr(os, 'O_DIRECTORY') else None
maruel@chromium.org538141b2013-06-03 20:57:17 +00001537 O_RDWR = os.O_RDWR
1538 O_WRONLY = os.O_WRONLY
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001539
1540 class Process(ApiBase.Context.Process):
1541 def __init__(self, *args):
1542 super(Dtrace.Context.Process, self).__init__(*args)
1543 self.cwd = self.initial_cwd
1544
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001545 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001546 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001547 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001548 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001549 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001550 # Process ID of the temporary script created by create_subprocess_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001551 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001552 self._initial_cwd = initial_cwd
1553 self._line_number = 0
1554
1555 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001556 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001557 self._line_number += 1
1558 match = self.RE_HEADER.match(line)
1559 if not match:
1560 raise TracingFailure(
1561 'Found malformed line: %s' % line,
1562 None,
1563 self._line_number,
1564 line)
1565 fn = getattr(
1566 self,
1567 'handle_%s' % match.group(2).replace('-', '_'),
1568 self._handle_ignored)
1569 # It is guaranteed to succeed because of the regexp. Or at least I thought
1570 # it would.
1571 pid = int(match.group(1))
1572 try:
1573 return fn(pid, match.group(3))
1574 except TracingFailure, e:
1575 # Hack in the values since the handler could be a static function.
1576 e.pid = pid
1577 e.line = line
1578 e.line_number = self._line_number
1579 # Re-raise the modified exception.
1580 raise
1581 except (KeyError, NotImplementedError, ValueError), e:
1582 raise TracingFailure(
1583 'Trace generated a %s exception: %s' % (
1584 e.__class__.__name__, str(e)),
1585 pid,
1586 self._line_number,
1587 line,
1588 e)
1589
1590 def to_results(self):
1591 process = self.root_process.to_results_process()
1592 # Internal concistency check.
1593 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1594 raise TracingFailure(
1595 'Found internal inconsitency in process lifetime detection '
1596 'while looking for len(tree) == len(list)',
1597 None,
1598 None,
1599 None,
1600 sorted(self._process_lookup),
1601 sorted(p.pid for p in process.all))
1602 return Results(process)
1603
1604 def handle_dtrace_BEGIN(self, _pid, args):
1605 if not self.RE_DTRACE_BEGIN.match(args):
1606 raise TracingFailure(
1607 'Found internal inconsitency in dtrace_BEGIN log line',
1608 None, None, None)
1609
1610 def handle_proc_start(self, pid, args):
1611 """Transfers cwd.
1612
1613 The dtrace script already takes care of only tracing the processes that
1614 are child of the traced processes so there is no need to verify the
1615 process hierarchy.
1616 """
1617 if pid in self._process_lookup:
1618 raise TracingFailure(
1619 'Found internal inconsitency in proc_start: %d started two times' %
1620 pid,
1621 None, None, None)
1622 match = self.RE_PROC_START.match(args)
1623 if not match:
1624 raise TracingFailure(
1625 'Failed to parse arguments: %s' % args,
1626 None, None, None)
1627 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001628 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001629 proc = self.root_process = self.Process(
1630 self.blacklist, pid, self._initial_cwd)
1631 elif ppid in self._process_lookup:
1632 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1633 self._process_lookup[ppid].children.append(proc)
1634 else:
1635 # Another process tree, ignore.
1636 return
1637 self._process_lookup[pid] = proc
1638 logging.debug(
1639 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001640 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001641
1642 def handle_proc_exit(self, pid, _args):
1643 """Removes cwd."""
1644 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001645 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001646 self._process_lookup[pid].cwd = None
1647
1648 def handle_execve(self, pid, args):
1649 """Sets the process' executable.
1650
1651 TODO(maruel): Read command line arguments. See
1652 https://discussions.apple.com/thread/1980539 for an example.
1653 https://gist.github.com/1242279
1654
1655 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1656 :)
1657 """
1658 if not pid in self._process_lookup:
1659 # Another process tree, ignore.
1660 return
1661 match = self.RE_EXECVE.match(args)
1662 if not match:
1663 raise TracingFailure(
1664 'Failed to parse arguments: %r' % args,
1665 None, None, None)
1666 proc = self._process_lookup[pid]
1667 proc.executable = match.group(1)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001668 self._handle_file(pid, proc.executable, Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001669 proc.command = self.process_escaped_arguments(match.group(3))
1670 if int(match.group(2)) != len(proc.command):
1671 raise TracingFailure(
1672 'Failed to parse execve() arguments: %s' % args,
1673 None, None, None)
1674
1675 def handle_chdir(self, pid, args):
1676 """Updates cwd."""
1677 if pid not in self._process_lookup:
1678 # Another process tree, ignore.
1679 return
1680 cwd = self.RE_CHDIR.match(args).group(1)
1681 if not cwd.startswith('/'):
1682 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1683 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1684 else:
1685 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1686 cwd2 = cwd
1687 self._process_lookup[pid].cwd = cwd2
1688
1689 def handle_open_nocancel(self, pid, args):
1690 """Redirects to handle_open()."""
1691 return self.handle_open(pid, args)
1692
1693 def handle_open(self, pid, args):
1694 if pid not in self._process_lookup:
1695 # Another process tree, ignore.
1696 return
1697 match = self.RE_OPEN.match(args)
1698 if not match:
1699 raise TracingFailure(
1700 'Failed to parse arguments: %s' % args,
1701 None, None, None)
1702 flag = int(match.group(2), 16)
1703 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1704 # Ignore directories.
1705 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001706 self._handle_file(
1707 pid,
1708 match.group(1),
1709 Results.File.READ if not ((self.O_RDWR | self.O_WRONLY) & flag)
1710 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001711
1712 def handle_rename(self, pid, args):
1713 if pid not in self._process_lookup:
1714 # Another process tree, ignore.
1715 return
1716 match = self.RE_RENAME.match(args)
1717 if not match:
1718 raise TracingFailure(
1719 'Failed to parse arguments: %s' % args,
1720 None, None, None)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001721 self._handle_file(pid, match.group(1), Results.File.READ)
1722 self._handle_file(pid, match.group(2), Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001723
maruel@chromium.org538141b2013-06-03 20:57:17 +00001724 def _handle_file(self, pid, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001725 if not filepath.startswith('/'):
1726 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1727 # We can get '..' in the path.
1728 filepath = os.path.normpath(filepath)
1729 # Sadly, still need to filter out directories here;
1730 # saw open_nocancel(".", 0, 0) = 0 lines.
1731 if os.path.isdir(filepath):
1732 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001733 self._process_lookup[pid].add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001734
1735 def handle_ftruncate(self, pid, args):
1736 """Just used as a signal to kill dtrace, ignoring."""
1737 pass
1738
1739 @staticmethod
1740 def _handle_ignored(pid, args):
1741 """Is called for all the event traces that are not handled."""
1742 raise NotImplementedError('Please implement me')
1743
1744 @staticmethod
1745 def process_escaped_arguments(text):
1746 """Extracts escaped arguments on a string and return the arguments as a
1747 list.
1748
1749 Implemented as an automaton.
1750
1751 Example:
1752 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1753 function will return ['python2.7', '-c', 'print("hi")]
1754 """
1755 if not text.endswith('\\0'):
1756 raise ValueError('String is not null terminated: %r' % text, text)
1757 text = text[:-2]
1758
1759 def unescape(x):
1760 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1761 out = []
1762 escaped = False
1763 for i in x:
1764 if i == '\\' and not escaped:
1765 escaped = True
1766 continue
1767 escaped = False
1768 out.append(i)
1769 return ''.join(out)
1770
1771 return [unescape(i) for i in text.split('\\001')]
1772
1773 class Tracer(ApiBase.Tracer):
1774 # pylint: disable=C0301
1775 #
1776 # To understand the following code, you'll want to take a look at:
1777 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
1778 # https://wikis.oracle.com/display/DTrace/Variables
1779 # http://docs.oracle.com/cd/E19205-01/820-4221/
1780 #
1781 # 0. Dump all the valid probes into a text file. It is important, you
1782 # want to redirect into a file and you don't want to constantly 'sudo'.
1783 # $ sudo dtrace -l > probes.txt
1784 #
1785 # 1. Count the number of probes:
1786 # $ wc -l probes.txt
1787 # 81823 # On OSX 10.7, including 1 header line.
1788 #
1789 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1790 # likes and skipping the header with NR>1:
1791 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1792 # dtrace
1793 # fbt
1794 # io
1795 # ip
1796 # lockstat
1797 # mach_trap
1798 # proc
1799 # profile
1800 # sched
1801 # syscall
1802 # tcp
1803 # vminfo
1804 #
1805 # 3. List of valid probes:
1806 # $ grep syscall probes.txt | less
1807 # or use dtrace directly:
1808 # $ sudo dtrace -l -P syscall | less
1809 #
1810 # trackedpid is an associative array where its value can be 0, 1 or 2.
1811 # 0 is for untracked processes and is the default value for items not
1812 # in the associative array.
1813 # 1 is for tracked processes.
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001814 # 2 is for the script created by create_subprocess_thunk() only. It is not
1815 # tracked itself but all its decendants are.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001816 #
1817 # The script will kill itself only once waiting_to_die == 1 and
1818 # current_processes == 0, so that both getlogin() was called and that
1819 # all traced processes exited.
1820 #
1821 # TODO(maruel): Use cacheable predicates. See
1822 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1823 D_CODE = """
1824 dtrace:::BEGIN {
1825 waiting_to_die = 0;
1826 current_processes = 0;
1827 logindex = 0;
1828 printf("%d %d %s_%s(\\"%s\\")\\n",
1829 logindex, PID, probeprov, probename, SCRIPT);
1830 logindex++;
1831 }
1832
1833 proc:::start /trackedpid[ppid]/ {
1834 trackedpid[pid] = 1;
1835 current_processes += 1;
1836 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1837 logindex, pid, probeprov, probename,
1838 ppid,
1839 execname,
1840 current_processes);
1841 logindex++;
1842 }
1843 /* Should use SCRIPT but there is no access to this variable at that
1844 * point. */
1845 proc:::start /ppid == PID && execname == "Python"/ {
1846 trackedpid[pid] = 2;
1847 current_processes += 1;
1848 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1849 logindex, pid, probeprov, probename,
1850 ppid,
1851 execname,
1852 current_processes);
1853 logindex++;
1854 }
1855 proc:::exit /trackedpid[pid] &&
1856 current_processes == 1 &&
1857 waiting_to_die == 1/ {
1858 trackedpid[pid] = 0;
1859 current_processes -= 1;
1860 printf("%d %d %s_%s(%d)\\n",
1861 logindex, pid, probeprov, probename,
1862 current_processes);
1863 logindex++;
1864 exit(0);
1865 }
1866 proc:::exit /trackedpid[pid]/ {
1867 trackedpid[pid] = 0;
1868 current_processes -= 1;
1869 printf("%d %d %s_%s(%d)\\n",
1870 logindex, pid, probeprov, probename,
1871 current_processes);
1872 logindex++;
1873 }
1874
1875 /* Use an arcane function to detect when we need to die */
1876 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
1877 waiting_to_die = 1;
1878 printf("%d %d %s()\\n", logindex, pid, probefunc);
1879 logindex++;
1880 }
1881 syscall::ftruncate:entry /
1882 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
1883 exit(0);
1884 }
1885
1886 syscall::open*:entry /trackedpid[pid] == 1/ {
1887 self->open_arg0 = arg0;
1888 self->open_arg1 = arg1;
1889 self->open_arg2 = arg2;
1890 }
1891 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
1892 this->open_arg0 = copyinstr(self->open_arg0);
1893 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
1894 logindex, pid, probefunc,
1895 this->open_arg0,
1896 self->open_arg1,
1897 self->open_arg2);
1898 logindex++;
1899 this->open_arg0 = 0;
1900 }
1901 syscall::open*:return /trackedpid[pid] == 1/ {
1902 self->open_arg0 = 0;
1903 self->open_arg1 = 0;
1904 self->open_arg2 = 0;
1905 }
1906
1907 syscall::rename:entry /trackedpid[pid] == 1/ {
1908 self->rename_arg0 = arg0;
1909 self->rename_arg1 = arg1;
1910 }
1911 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
1912 this->rename_arg0 = copyinstr(self->rename_arg0);
1913 this->rename_arg1 = copyinstr(self->rename_arg1);
1914 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
1915 logindex, pid, probefunc,
1916 this->rename_arg0,
1917 this->rename_arg1);
1918 logindex++;
1919 this->rename_arg0 = 0;
1920 this->rename_arg1 = 0;
1921 }
1922 syscall::rename:return /trackedpid[pid] == 1/ {
1923 self->rename_arg0 = 0;
1924 self->rename_arg1 = 0;
1925 }
1926
1927 /* Track chdir, it's painful because it is only receiving relative path.
1928 */
1929 syscall::chdir:entry /trackedpid[pid] == 1/ {
1930 self->chdir_arg0 = arg0;
1931 }
1932 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
1933 this->chdir_arg0 = copyinstr(self->chdir_arg0);
1934 printf("%d %d %s(\\"%s\\")\\n",
1935 logindex, pid, probefunc,
1936 this->chdir_arg0);
1937 logindex++;
1938 this->chdir_arg0 = 0;
1939 }
1940 syscall::chdir:return /trackedpid[pid] == 1/ {
1941 self->chdir_arg0 = 0;
1942 }
1943 """
1944
1945 # execve-specific code, tends to throw a lot of exceptions.
1946 D_CODE_EXECVE = """
1947 /* Finally what we care about! */
1948 syscall::exec*:entry /trackedpid[pid]/ {
1949 self->exec_arg0 = copyinstr(arg0);
1950 /* Incrementally probe for a NULL in the argv parameter of execve() to
1951 * figure out argc. */
1952 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
1953 * found. */
1954 self->exec_argc = 0;
1955 /* Probe for argc==1 */
1956 this->exec_argv = (user_addr_t*)copyin(
1957 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1958 self->exec_argc = this->exec_argv[self->exec_argc] ?
1959 (self->exec_argc + 1) : self->exec_argc;
1960
1961 /* Probe for argc==2 */
1962 this->exec_argv = (user_addr_t*)copyin(
1963 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1964 self->exec_argc = this->exec_argv[self->exec_argc] ?
1965 (self->exec_argc + 1) : self->exec_argc;
1966
1967 /* Probe for argc==3 */
1968 this->exec_argv = (user_addr_t*)copyin(
1969 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1970 self->exec_argc = this->exec_argv[self->exec_argc] ?
1971 (self->exec_argc + 1) : self->exec_argc;
1972
1973 /* Probe for argc==4 */
1974 this->exec_argv = (user_addr_t*)copyin(
1975 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1976 self->exec_argc = this->exec_argv[self->exec_argc] ?
1977 (self->exec_argc + 1) : self->exec_argc;
1978
1979 /* Copy the inputs strings since there is no guarantee they'll be
1980 * present after the call completed. */
1981 self->exec_argv0 = (self->exec_argc > 0) ?
1982 copyinstr(this->exec_argv[0]) : "";
1983 self->exec_argv1 = (self->exec_argc > 1) ?
1984 copyinstr(this->exec_argv[1]) : "";
1985 self->exec_argv2 = (self->exec_argc > 2) ?
1986 copyinstr(this->exec_argv[2]) : "";
1987 self->exec_argv3 = (self->exec_argc > 3) ?
1988 copyinstr(this->exec_argv[3]) : "";
1989 this->exec_argv = 0;
1990 }
1991 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
1992 /* We need to join strings here, as using multiple printf() would
1993 * cause tearing when multiple threads/processes are traced.
1994 * Since it is impossible to escape a string and join it to another one,
1995 * like sprintf("%s%S", previous, more), use hackery.
1996 * Each of the elements are split with a \\1. \\0 cannot be used because
1997 * it is simply ignored. This will conflict with any program putting a
1998 * \\1 in their execve() string but this should be "rare enough" */
1999 this->args = "";
2000 /* Process exec_argv[0] */
2001 this->args = strjoin(
2002 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2003
2004 /* Process exec_argv[1] */
2005 this->args = strjoin(
2006 this->args, (self->exec_argc > 1) ? "\\1" : "");
2007 this->args = strjoin(
2008 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2009
2010 /* Process exec_argv[2] */
2011 this->args = strjoin(
2012 this->args, (self->exec_argc > 2) ? "\\1" : "");
2013 this->args = strjoin(
2014 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2015
2016 /* Process exec_argv[3] */
2017 this->args = strjoin(
2018 this->args, (self->exec_argc > 3) ? "\\1" : "");
2019 this->args = strjoin(
2020 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2021
2022 /* Prints self->exec_argc to permits verifying the internal
2023 * consistency since this code is quite fishy. */
2024 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2025 logindex, pid, probefunc,
2026 self->exec_arg0,
2027 self->exec_argc,
2028 this->args);
2029 logindex++;
2030 this->args = 0;
2031 }
2032 syscall::exec*:return /trackedpid[pid]/ {
2033 self->exec_arg0 = 0;
2034 self->exec_argc = 0;
2035 self->exec_argv0 = 0;
2036 self->exec_argv1 = 0;
2037 self->exec_argv2 = 0;
2038 self->exec_argv3 = 0;
2039 }
2040 """
2041
2042 # Code currently not used.
2043 D_EXTRANEOUS = """
2044 /* This is a good learning experience, since it traces a lot of things
2045 * related to the process and child processes.
2046 * Warning: it generates a gigantic log. For example, tracing
2047 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2048 * several minutes to execute.
2049 */
2050 /*
2051 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2052 printf("%d %d %s_%s() = %d\\n",
2053 logindex, pid, probeprov, probefunc, errno);
2054 logindex++;
2055 }
2056 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2057 printf("%d %d %s_%s() = %d\\n",
2058 logindex, pid, probeprov, probefunc, errno);
2059 logindex++;
2060 }
2061 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2062 printf("%d %d %s_%s() = %d\\n",
2063 logindex, pid, probeprov, probefunc, errno);
2064 logindex++;
2065 }
2066 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2067 printf("%d %d %s_%s() = %d\\n",
2068 logindex, pid, probeprov, probefunc, errno);
2069 logindex++;
2070 }
2071 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2072 printf("%d %d %s_%s() = %d\\n",
2073 logindex, pid, probeprov, probefunc, errno);
2074 logindex++;
2075 }
2076 */
2077 /* TODO(maruel): *stat* functions and friends
2078 syscall::access:return,
2079 syscall::chdir:return,
2080 syscall::chflags:return,
2081 syscall::chown:return,
2082 syscall::chroot:return,
2083 syscall::getattrlist:return,
2084 syscall::getxattr:return,
2085 syscall::lchown:return,
2086 syscall::lstat64:return,
2087 syscall::lstat:return,
2088 syscall::mkdir:return,
2089 syscall::pathconf:return,
2090 syscall::readlink:return,
2091 syscall::removexattr:return,
2092 syscall::setxattr:return,
2093 syscall::stat64:return,
2094 syscall::stat:return,
2095 syscall::truncate:return,
2096 syscall::unlink:return,
2097 syscall::utimes:return,
2098 */
2099 """
2100
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002101 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002102 """Starts the log collection with dtrace.
2103
2104 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2105 this needs to wait for dtrace to be "warmed up".
2106 """
2107 super(Dtrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002108 # This script is used as a signal to figure out the root process.
2109 self._signal_script = create_subprocess_thunk()
2110 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002111 # This unique dummy temp file is used to signal the dtrace script that it
2112 # should stop as soon as all the child processes are done. A bit hackish
2113 # but works fine enough.
2114 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2115 prefix='trace_signal_file')
2116
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002117 dtrace_path = '/usr/sbin/dtrace'
2118 if not os.path.isfile(dtrace_path):
2119 dtrace_path = 'dtrace'
2120 elif use_sudo is None and (os.stat(dtrace_path).st_mode & stat.S_ISUID):
2121 # No need to sudo. For those following at home, don't do that.
2122 use_sudo = False
2123
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002124 # Note: do not use the -p flag. It's useless if the initial process quits
2125 # too fast, resulting in missing traces from the grand-children. The D
2126 # code manages the dtrace lifetime itself.
2127 trace_cmd = [
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002128 dtrace_path,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002129 # Use a larger buffer if getting 'out of scratch space' errors.
2130 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2131 '-b', '10m',
2132 '-x', 'dynvarsize=10m',
2133 #'-x', 'dtrace_global_maxsize=1m',
2134 '-x', 'evaltime=exec',
2135 '-o', '/dev/stderr',
2136 '-q',
2137 '-n', self._get_dtrace_code(),
2138 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002139 if use_sudo is not False:
2140 trace_cmd.insert(0, 'sudo')
2141
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002142 with open(self._logname + '.log', 'wb') as logfile:
2143 self._dtrace = subprocess.Popen(
2144 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2145 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2146
2147 # Reads until one line is printed, which signifies dtrace is up and ready.
2148 with open(self._logname + '.log', 'rb') as logfile:
2149 while 'dtrace_BEGIN' not in logfile.readline():
2150 if self._dtrace.poll() is not None:
2151 # Do a busy wait. :/
2152 break
2153 logging.debug('dtrace started')
2154
2155 def _get_dtrace_code(self):
2156 """Setups the D code to implement child process tracking.
2157
2158 Injects the cookie in the script so it knows when to stop.
2159
2160 The script will detect any instance of the script created with
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002161 create_subprocess_thunk() and will start tracing it.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002162 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002163 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002164 'inline int PID = %d;\n'
2165 'inline string SCRIPT = "%s";\n'
2166 'inline int FILE_ID = %d;\n'
2167 '\n'
2168 '%s') % (
2169 os.getpid(),
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002170 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002171 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002172 self.D_CODE)
2173 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2174 # Do not enable by default since it tends to spew dtrace: error lines
2175 # because the execve() parameters are not in valid memory at the time of
2176 # logging.
2177 # TODO(maruel): Find a way to make this reliable since it's useful but
2178 # only works in limited/trivial uses cases for now.
2179 out += self.D_CODE_EXECVE
2180 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002181
2182 def trace(self, cmd, cwd, tracename, output):
2183 """Runs dtrace on an executable.
2184
2185 This dtruss is broken when it starts the process itself or when tracing
2186 child processes, this code starts a wrapper process
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002187 generated with create_subprocess_thunk() which starts the executable to
2188 trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002189 """
2190 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2191 assert os.path.isabs(cmd[0]), cmd[0]
2192 assert os.path.isabs(cwd), cwd
2193 assert os.path.normpath(cwd) == cwd, cwd
2194 with self._lock:
2195 if not self._initialized:
2196 raise TracingFailure(
2197 'Called Tracer.trace() on an unitialized object',
2198 None, None, None, tracename)
2199 assert tracename not in (i['trace'] for i in self._traces)
2200
2201 # Starts the script wrapper to start the child process. This signals the
2202 # dtrace script that this process is to be traced.
2203 stdout = stderr = None
2204 if output:
2205 stdout = subprocess.PIPE
2206 stderr = subprocess.STDOUT
2207 child_cmd = [
2208 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002209 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002210 tracename,
2211 ]
2212 # Call a dummy function so that dtrace knows I'm about to launch a process
2213 # that needs to be traced.
2214 # Yummy.
2215 child = subprocess.Popen(
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00002216 child_cmd + tools.fix_python_path(cmd),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002217 stdin=subprocess.PIPE,
2218 stdout=stdout,
2219 stderr=stderr,
2220 cwd=cwd)
2221 logging.debug('Started child pid: %d' % child.pid)
2222
2223 out = child.communicate()[0]
2224 # This doesn't mean tracing is done, one of the grand-child process may
2225 # still be alive. It will be tracked with the dtrace script.
2226
2227 with self._lock:
2228 assert tracename not in (i['trace'] for i in self._traces)
2229 self._traces.append(
2230 {
2231 'cmd': cmd,
2232 'cwd': cwd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002233 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002234 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002235 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002236 })
2237 return child.returncode, out
2238
2239 def close(self, timeout=None):
2240 """Terminates dtrace."""
2241 logging.debug('close(%s)' % timeout)
2242 try:
2243 try:
2244 super(Dtrace.Tracer, self).close(timeout)
2245 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002246 # ftruncate doesn't exist on Windows.
2247 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002248 if timeout:
2249 start = time.time()
2250 # Use polling. :/
2251 while (self._dtrace.poll() is None and
2252 (time.time() - start) < timeout):
2253 time.sleep(0.1)
2254 self._dtrace.kill()
2255 self._dtrace.wait()
2256 finally:
2257 # Make sure to kill it in any case.
2258 if self._dtrace.poll() is None:
2259 try:
2260 self._dtrace.kill()
2261 self._dtrace.wait()
2262 except OSError:
2263 pass
2264
2265 if self._dtrace.returncode != 0:
2266 # Warn about any dtrace failure but basically ignore it.
2267 print 'dtrace failure: %s' % self._dtrace.returncode
2268 finally:
2269 os.close(self._dummy_file_id)
2270 os.remove(self._dummy_file_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002271
2272 def post_process_log(self):
2273 """Sorts the log back in order when each call occured.
2274
2275 dtrace doesn't save the buffer in strict order since it keeps one buffer
2276 per CPU.
2277 """
2278 super(Dtrace.Tracer, self).post_process_log()
2279 logname = self._logname + '.log'
2280 with open(logname, 'rb') as logfile:
2281 lines = [l for l in logfile if l.strip()]
2282 errors = [l for l in lines if l.startswith('dtrace:')]
2283 if errors:
2284 raise TracingFailure(
2285 'Found errors in the trace: %s' % '\n'.join(errors),
2286 None, None, None, logname)
2287 try:
2288 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2289 except ValueError:
2290 raise TracingFailure(
2291 'Found errors in the trace: %s' % '\n'.join(
2292 l for l in lines if l.split(' ', 1)[0].isdigit()),
2293 None, None, None, logname)
2294 with open(logname, 'wb') as logfile:
2295 logfile.write(''.join(lines))
2296
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002297 def __init__(self, use_sudo=None):
2298 super(Dtrace, self).__init__()
2299 self.use_sudo = use_sudo
2300
2301 def get_tracer(self, logname):
2302 return self.Tracer(logname, self.use_sudo)
2303
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002304 @staticmethod
2305 def clean_trace(logname):
2306 for ext in ('', '.log'):
2307 if os.path.isfile(logname + ext):
2308 os.remove(logname + ext)
2309
2310 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002311 def parse_log(cls, logname, blacklist, trace_name):
2312 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002313 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002314
2315 def blacklist_more(filepath):
2316 # All the HFS metadata is in the form /.vol/...
2317 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2318
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05002319 data = tools.read_json(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002320 out = []
2321 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002322 if trace_name and item['trace'] != trace_name:
2323 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002324 result = {
2325 'output': item['output'],
2326 'trace': item['trace'],
2327 }
2328 try:
2329 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002330 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2331 # be valid UTF-8 and we control the log output.
2332 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002333 context.on_line(line)
2334 result['results'] = context.to_results()
2335 except TracingFailure:
2336 result['exception'] = sys.exc_info()
2337 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002338 return out
2339
2340
2341class LogmanTrace(ApiBase):
2342 """Uses the native Windows ETW based tracing functionality to trace a child
2343 process.
2344
2345 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2346 the Windows Kernel doesn't have a concept of 'current working directory' at
2347 all. A Win32 process has a map of current directories, one per drive letter
2348 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2349 opened relative to another file_object or as an absolute path. All the current
2350 working directory logic is done in user mode.
2351 """
2352 class Context(ApiBase.Context):
2353 """Processes a ETW log line and keeps the list of existent and non
2354 existent files accessed.
2355
2356 Ignores directories.
2357 """
2358 # These indexes are for the stripped version in json.
2359 EVENT_NAME = 0
2360 TYPE = 1
2361 PID = 2
2362 TID = 3
2363 PROCESSOR_ID = 4
2364 TIMESTAMP = 5
2365 USER_DATA = 6
2366
2367 class Process(ApiBase.Context.Process):
2368 def __init__(self, *args):
2369 super(LogmanTrace.Context.Process, self).__init__(*args)
2370 # Handle file objects that succeeded.
2371 self.file_objects = {}
2372
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002373 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2374 logging.info(
2375 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2376 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002377 super(LogmanTrace.Context, self).__init__(blacklist)
maruel@chromium.org561d4b22013-09-26 21:08:08 +00002378 self._drive_map = file_path.DosDriveMap()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002379 # Threads mapping to the corresponding process id.
2380 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002381 # Process ID of the tracer, e.g. the temporary script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002382 # create_subprocess_thunk(). This is tricky because the process id may
2383 # have been reused.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002384 self._thunk_pid = thunk_pid
2385 self._thunk_cmd = thunk_cmd
2386 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002387 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002388 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002389
2390 def on_line(self, line):
2391 """Processes a json Event line."""
2392 self._line_number += 1
2393 try:
2394 # By Opcode
2395 handler = getattr(
2396 self,
2397 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2398 None)
2399 if not handler:
2400 raise TracingFailure(
2401 'Unexpected event %s_%s' % (
2402 line[self.EVENT_NAME], line[self.TYPE]),
2403 None, None, None)
2404 handler(line)
2405 except TracingFailure, e:
2406 # Hack in the values since the handler could be a static function.
2407 e.pid = line[self.PID]
2408 e.line = line
2409 e.line_number = self._line_number
2410 # Re-raise the modified exception.
2411 raise
2412 except (KeyError, NotImplementedError, ValueError), e:
2413 raise TracingFailure(
2414 'Trace generated a %s exception: %s' % (
2415 e.__class__.__name__, str(e)),
2416 line[self.PID],
2417 self._line_number,
2418 line,
2419 e)
2420
2421 def to_results(self):
2422 if not self.root_process:
2423 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002424 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002425 None, None, None)
2426 process = self.root_process.to_results_process()
2427 return Results(process)
2428
2429 def _thread_to_process(self, tid):
2430 """Finds the process from the thread id."""
2431 tid = int(tid, 16)
2432 pid = self._threads_active.get(tid)
2433 if not pid or not self._process_lookup.get(pid):
2434 return
2435 return self._process_lookup[pid]
2436
2437 @classmethod
2438 def handle_EventTrace_Header(cls, line):
2439 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2440 BUFFER_SIZE = cls.USER_DATA
2441 #VERSION = cls.USER_DATA + 1
2442 #PROVIDER_VERSION = cls.USER_DATA + 2
2443 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2444 #END_TIME = cls.USER_DATA + 4
2445 #TIMER_RESOLUTION = cls.USER_DATA + 5
2446 #MAX_FILE_SIZE = cls.USER_DATA + 6
2447 #LOG_FILE_MODE = cls.USER_DATA + 7
2448 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2449 #START_BUFFERS = cls.USER_DATA + 9
2450 #POINTER_SIZE = cls.USER_DATA + 10
2451 EVENTS_LOST = cls.USER_DATA + 11
2452 #CPU_SPEED = cls.USER_DATA + 12
2453 #LOGGER_NAME = cls.USER_DATA + 13
2454 #LOG_FILE_NAME = cls.USER_DATA + 14
2455 #BOOT_TIME = cls.USER_DATA + 15
2456 #PERF_FREQ = cls.USER_DATA + 16
2457 #START_TIME = cls.USER_DATA + 17
2458 #RESERVED_FLAGS = cls.USER_DATA + 18
2459 #BUFFERS_LOST = cls.USER_DATA + 19
2460 #SESSION_NAME_STRING = cls.USER_DATA + 20
2461 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2462 if line[EVENTS_LOST] != '0':
2463 raise TracingFailure(
2464 ( '%s events were lost during trace, please increase the buffer '
2465 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2466 None, None, None)
2467
2468 def handle_FileIo_Cleanup(self, line):
2469 """General wisdom: if a file is closed, it's because it was opened.
2470
2471 Note that FileIo_Close is not used since if a file was opened properly but
2472 not closed before the process exits, only Cleanup will be logged.
2473 """
2474 #IRP = self.USER_DATA
2475 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2476 FILE_OBJECT = self.USER_DATA + 2
2477 #FILE_KEY = self.USER_DATA + 3
2478 proc = self._thread_to_process(line[TTID])
2479 if not proc:
2480 # Not a process we care about.
2481 return
2482 file_object = line[FILE_OBJECT]
2483 if file_object in proc.file_objects:
maruel@chromium.org538141b2013-06-03 20:57:17 +00002484 filepath, access_type = proc.file_objects.pop(file_object)
2485 proc.add_file(filepath, access_type)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002486
2487 def handle_FileIo_Create(self, line):
2488 """Handles a file open.
2489
2490 All FileIo events are described at
2491 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2492 for some value of 'description'.
2493
2494 " (..) process and thread id values of the IO events (..) are not valid "
2495 http://msdn.microsoft.com/magazine/ee358703.aspx
2496
2497 The FileIo.Create event doesn't return if the CreateFile() call
2498 succeeded, so keep track of the file_object and check that it is
2499 eventually closed with FileIo_Cleanup.
2500 """
2501 #IRP = self.USER_DATA
2502 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2503 FILE_OBJECT = self.USER_DATA + 2
2504 #CREATE_OPTIONS = self.USER_DATA + 3
2505 #FILE_ATTRIBUTES = self.USER_DATA + 4
maruel@chromium.org538141b2013-06-03 20:57:17 +00002506 #SHARE_ACCESS = self.USER_DATA + 5
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002507 OPEN_PATH = self.USER_DATA + 6
2508
2509 proc = self._thread_to_process(line[TTID])
2510 if not proc:
2511 # Not a process we care about.
2512 return
2513
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002514 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002515 # Ignore directories and bare drive right away.
2516 if raw_path.endswith(os.path.sep):
2517 return
2518 filepath = self._drive_map.to_win32(raw_path)
2519 # Ignore bare drive right away. Some may still fall through with format
2520 # like '\\?\X:'
2521 if len(filepath) == 2:
2522 return
2523 file_object = line[FILE_OBJECT]
2524 if os.path.isdir(filepath):
2525 # There is no O_DIRECTORY equivalent on Windows. The closed is
2526 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2527 # simply discard directories are they are found.
2528 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002529 # Override any stale file object.
2530 # TODO(maruel): Figure out a way to detect if the file was opened for
2531 # reading or writting. Sadly CREATE_OPTIONS doesn't seem to be of any help
2532 # here. For now mark as None to make it clear we have no idea what it is
2533 # about.
2534 proc.file_objects[file_object] = (filepath, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002535
2536 def handle_FileIo_Rename(self, line):
2537 # TODO(maruel): Handle?
2538 pass
2539
2540 def handle_Process_End(self, line):
2541 pid = line[self.PID]
2542 if self._process_lookup.get(pid):
2543 logging.info('Terminated: %d' % pid)
2544 self._process_lookup[pid] = None
2545 else:
2546 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002547 if self._thunk_process and self._thunk_process.pid == pid:
2548 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002549
2550 def handle_Process_Start(self, line):
2551 """Handles a new child process started by PID."""
2552 #UNIQUE_PROCESS_KEY = self.USER_DATA
2553 PROCESS_ID = self.USER_DATA + 1
2554 #PARENT_PID = self.USER_DATA + 2
2555 #SESSION_ID = self.USER_DATA + 3
2556 #EXIT_STATUS = self.USER_DATA + 4
2557 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2558 #USER_SID = self.USER_DATA + 6
2559 IMAGE_FILE_NAME = self.USER_DATA + 7
2560 COMMAND_LINE = self.USER_DATA + 8
2561
2562 ppid = line[self.PID]
2563 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002564 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002565 logging.debug(
2566 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002567 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002568
2569 if pid == self._thunk_pid:
2570 # Need to ignore processes we don't know about because the log is
2571 # system-wide. self._thunk_pid shall start only one process.
2572 # This is tricky though because Windows *loves* to reuse process id and
2573 # it happens often that the process ID of the thunk script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002574 # create_subprocess_thunk() is reused. So just detecting the pid here is
2575 # not sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002576 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2577 logging.info(
2578 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2579 pid, self._trace_name, command_line, self._thunk_cmd)
2580 return
2581
2582 # TODO(maruel): The check is quite weak. Add the thunk path.
2583 if self._thunk_process:
2584 raise TracingFailure(
2585 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2586 'already set') % (self._thunk_pid, self._thunk_process.pid),
2587 None, None, None)
2588 proc = self.Process(self.blacklist, pid, None)
2589 self._thunk_process = proc
2590 return
2591 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002592 proc = self.Process(self.blacklist, pid, None)
2593 self.root_process = proc
2594 ppid = None
2595 elif self._process_lookup.get(ppid):
2596 proc = self.Process(self.blacklist, pid, None)
2597 self._process_lookup[ppid].children.append(proc)
2598 else:
2599 # Ignore
2600 return
2601 self._process_lookup[pid] = proc
2602
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002603 proc.command = command_line
2604 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002605 # proc.command[0] may be the absolute path of 'executable' but it may be
2606 # anything else too. If it happens that command[0] ends with executable,
2607 # use it, otherwise defaults to the base name.
2608 cmd0 = proc.command[0].lower()
2609 if not cmd0.endswith('.exe'):
2610 # TODO(maruel): That's not strictly true either.
2611 cmd0 += '.exe'
2612 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
2613 # Fix the path.
2614 cmd0 = cmd0.replace('/', os.path.sep)
2615 cmd0 = os.path.normpath(cmd0)
maruel@chromium.org561d4b22013-09-26 21:08:08 +00002616 proc.executable = file_path.get_native_path_case(cmd0)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002617 logging.info(
2618 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2619
2620 def handle_Thread_End(self, line):
2621 """Has the same parameters as Thread_Start."""
2622 tid = int(line[self.TID], 16)
2623 self._threads_active.pop(tid, None)
2624
2625 def handle_Thread_Start(self, line):
2626 """Handles a new thread created.
2627
2628 Do not use self.PID here since a process' initial thread is created by
2629 the parent process.
2630 """
2631 PROCESS_ID = self.USER_DATA
2632 TTHREAD_ID = self.USER_DATA + 1
2633 #STACK_BASE = self.USER_DATA + 2
2634 #STACK_LIMIT = self.USER_DATA + 3
2635 #USER_STACK_BASE = self.USER_DATA + 4
2636 #USER_STACK_LIMIT = self.USER_DATA + 5
2637 #AFFINITY = self.USER_DATA + 6
2638 #WIN32_START_ADDR = self.USER_DATA + 7
2639 #TEB_BASE = self.USER_DATA + 8
2640 #SUB_PROCESS_TAG = self.USER_DATA + 9
2641 #BASE_PRIORITY = self.USER_DATA + 10
2642 #PAGE_PRIORITY = self.USER_DATA + 11
2643 #IO_PRIORITY = self.USER_DATA + 12
2644 #THREAD_FLAGS = self.USER_DATA + 13
2645 # Do not use self.PID here since a process' initial thread is created by
2646 # the parent process.
2647 pid = int(line[PROCESS_ID], 16)
2648 tid = int(line[TTHREAD_ID], 16)
2649 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2650 self._threads_active[tid] = pid
2651
2652 @classmethod
2653 def supported_events(cls):
2654 """Returns all the procesed events."""
2655 out = []
2656 for member in dir(cls):
2657 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2658 if match:
2659 out.append(match.groups())
2660 return out
2661
2662 class Tracer(ApiBase.Tracer):
2663 # The basic headers.
2664 EXPECTED_HEADER = [
2665 u'Event Name',
2666 u'Type',
2667 u'Event ID',
2668 u'Version',
2669 u'Channel',
2670 u'Level', # 5
2671 u'Opcode',
2672 u'Task',
2673 u'Keyword',
2674 u'PID',
2675 u'TID', # 10
2676 u'Processor Number',
2677 u'Instance ID',
2678 u'Parent Instance ID',
2679 u'Activity ID',
2680 u'Related Activity ID', # 15
2681 u'Clock-Time',
2682 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2683 u'User(ms)', # pretty much useless.
2684 u'User Data', # Extra arguments that are event-specific.
2685 ]
2686 # Only the useful headers common to all entries are listed there. Any column
2687 # at 19 or higher is dependent on the specific event.
2688 EVENT_NAME = 0
2689 TYPE = 1
2690 PID = 9
2691 TID = 10
2692 PROCESSOR_ID = 11
2693 TIMESTAMP = 16
2694 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2695 USER_DATA = 19
2696
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002697 class CsvReader(object):
2698 """CSV reader that reads files generated by tracerpt.exe.
2699
2700 csv.reader() fails to read them properly, it mangles file names quoted
2701 with "" with a comma in it.
2702 """
2703 # 0. Had a ',' or one of the following ' ' after a comma, next should
2704 # be ' ', '"' or string or ',' for an empty field.
2705 ( HAD_DELIMITER,
2706 # 1. Processing an unquoted field up to ','.
2707 IN_STR,
2708 # 2. Processing a new field starting with '"'.
2709 STARTING_STR_QUOTED,
2710 # 3. Second quote in a row at the start of a field. It could be either
2711 # '""foo""' or '""'. Who the hell thought it was a great idea to use
2712 # the same character for delimiting and escaping?
2713 STARTING_SECOND_QUOTE,
2714 # 4. A quote inside a quoted string where the previous character was
2715 # not a quote, so the string is not empty. Can be either: end of a
2716 # quoted string (a delimiter) or a quote escape. The next char must be
2717 # either '"' or ','.
2718 HAD_QUOTE_IN_QUOTED,
2719 # 5. Second quote inside a quoted string.
2720 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
2721 # 6. Processing a field that started with '"'.
2722 IN_STR_QUOTED) = range(7)
2723
2724 def __init__(self, f):
2725 self.f = f
2726
2727 def __iter__(self):
2728 return self
2729
2730 def next(self):
2731 """Splits the line in fields."""
2732 line = self.f.readline()
2733 if not line:
2734 raise StopIteration()
2735 line = line.strip()
2736 fields = []
2737 state = self.HAD_DELIMITER
2738 for i, c in enumerate(line):
2739 if state == self.HAD_DELIMITER:
2740 if c == ',':
2741 # Empty field.
2742 fields.append('')
2743 elif c == ' ':
2744 # Ignore initial whitespaces
2745 pass
2746 elif c == '"':
2747 state = self.STARTING_STR_QUOTED
2748 fields.append('')
2749 else:
2750 # Start of a new field.
2751 state = self.IN_STR
2752 fields.append(c)
2753
2754 elif state == self.IN_STR:
2755 # Do not accept quote inside unquoted field.
2756 assert c != '"', (i, c, line, fields)
2757 if c == ',':
2758 fields[-1] = fields[-1].strip()
2759 state = self.HAD_DELIMITER
2760 else:
2761 fields[-1] = fields[-1] + c
2762
2763 elif state == self.STARTING_STR_QUOTED:
2764 if c == '"':
2765 # Do not store the character yet.
2766 state = self.STARTING_SECOND_QUOTE
2767 else:
2768 state = self.IN_STR_QUOTED
2769 fields[-1] = fields[-1] + c
2770
2771 elif state == self.STARTING_SECOND_QUOTE:
2772 if c == ',':
2773 # It was an empty field. '""' == ''.
2774 state = self.HAD_DELIMITER
2775 else:
2776 fields[-1] = fields[-1] + '"' + c
2777 state = self.IN_STR_QUOTED
2778
2779 elif state == self.HAD_QUOTE_IN_QUOTED:
2780 if c == ',':
2781 # End of the string.
2782 state = self.HAD_DELIMITER
2783 elif c == '"':
2784 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
2785 else:
2786 # The previous double-quote was just an unescaped quote.
2787 fields[-1] = fields[-1] + '"' + c
2788 state = self.IN_STR_QUOTED
2789
2790 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2791 if c == ',':
2792 # End of the string.
2793 state = self.HAD_DELIMITER
2794 fields[-1] = fields[-1] + '"'
2795 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00002796 # That's just how the logger rolls. Revert back to appending the
2797 # char and "guess" it was a quote in a double-quoted string.
2798 state = self.IN_STR_QUOTED
2799 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002800
2801 elif state == self.IN_STR_QUOTED:
2802 if c == '"':
2803 # Could be a delimiter or an escape.
2804 state = self.HAD_QUOTE_IN_QUOTED
2805 else:
2806 fields[-1] = fields[-1] + c
2807
2808 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2809 fields[-1] = fields[-1] + '"'
2810 else:
2811 assert state in (
2812 # Terminated with a normal field.
2813 self.IN_STR,
2814 # Terminated with an empty field.
2815 self.STARTING_SECOND_QUOTE,
2816 # Terminated with a normal quoted field.
2817 self.HAD_QUOTE_IN_QUOTED), (
2818 line, state, fields)
2819 return fields
2820
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002821 def __init__(self, logname):
2822 """Starts the log collection.
2823
2824 Requires administrative access. logman.exe is synchronous so no need for a
2825 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2826 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2827
2828 One can get the list of potentially interesting providers with:
2829 "logman query providers | findstr /i file"
2830 """
2831 super(LogmanTrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002832 self._signal_script = create_subprocess_thunk()
2833 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002834 cmd_start = [
2835 'logman.exe',
2836 'start',
2837 'NT Kernel Logger',
2838 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2839 # splitio,fileiocompletion,syscall,file,cswitch,img
2840 '(process,fileio,thread)',
2841 '-o', self._logname + '.etl',
2842 '-ets', # Send directly to kernel
2843 # Values extracted out of thin air.
2844 # Event Trace Session buffer size in kb.
2845 '-bs', '10240',
2846 # Number of Event Trace Session buffers.
2847 '-nb', '16', '256',
2848 ]
2849 logging.debug('Running: %s' % cmd_start)
2850 try:
2851 subprocess.check_call(
2852 cmd_start,
2853 stdin=subprocess.PIPE,
2854 stdout=subprocess.PIPE,
2855 stderr=subprocess.STDOUT)
2856 except subprocess.CalledProcessError, e:
2857 if e.returncode == -2147024891:
2858 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2859 elif e.returncode == -2144337737:
2860 print >> sys.stderr, (
2861 'A kernel trace was already running, stop it and try again')
2862 raise
2863
2864 def trace(self, cmd, cwd, tracename, output):
2865 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2866 assert os.path.isabs(cmd[0]), cmd[0]
2867 assert os.path.isabs(cwd), cwd
2868 assert os.path.normpath(cwd) == cwd, cwd
2869 with self._lock:
2870 if not self._initialized:
2871 raise TracingFailure(
2872 'Called Tracer.trace() on an unitialized object',
2873 None, None, None, tracename)
2874 assert tracename not in (i['trace'] for i in self._traces)
2875
2876 # Use "logman -?" for help.
2877
2878 stdout = stderr = None
2879 if output:
2880 stdout = subprocess.PIPE
2881 stderr = subprocess.STDOUT
2882
2883 # Run the child process.
2884 logging.debug('Running: %s' % cmd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002885 # Use the temporary script generated with create_subprocess_thunk() so we
2886 # have a clear pid owner. Since trace_inputs.py can be used as a library
2887 # and could trace multiple processes simultaneously, it makes it more
2888 # complex if the executable to be traced is executed directly here. It
2889 # also solves issues related to logman.exe that needs to be executed to
2890 # control the kernel trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002891 child_cmd = [
2892 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002893 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002894 tracename,
2895 ]
2896 child = subprocess.Popen(
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00002897 child_cmd + tools.fix_python_path(cmd),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002898 cwd=cwd,
2899 stdin=subprocess.PIPE,
2900 stdout=stdout,
2901 stderr=stderr)
2902 logging.debug('Started child pid: %d' % child.pid)
2903 out = child.communicate()[0]
2904 # This doesn't mean all the grand-children are done. Sadly, we don't have
2905 # a good way to determine that.
2906
2907 with self._lock:
2908 assert tracename not in (i['trace'] for i in self._traces)
2909 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002910 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002911 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002912 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002913 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002914 # Used to figure out the real process when process ids are reused.
2915 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002916 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002917 })
2918
2919 return child.returncode, out
2920
2921 def close(self, _timeout=None):
2922 """Stops the kernel log collection and converts the traces to text
2923 representation.
2924 """
2925 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00002926 try:
2927 super(LogmanTrace.Tracer, self).close()
2928 finally:
2929 cmd_stop = [
2930 'logman.exe',
2931 'stop',
2932 'NT Kernel Logger',
2933 '-ets', # Sends the command directly to the kernel.
2934 ]
2935 logging.debug('Running: %s' % cmd_stop)
2936 subprocess.check_call(
2937 cmd_stop,
2938 stdin=subprocess.PIPE,
2939 stdout=subprocess.PIPE,
2940 stderr=subprocess.STDOUT)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002941
2942 def post_process_log(self):
2943 """Converts the .etl file into .csv then into .json."""
2944 super(LogmanTrace.Tracer, self).post_process_log()
2945 logformat = 'csv'
2946 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002947 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002948
maruel@chromium.org808e3f22013-05-16 16:42:54 +00002949 def _gen_logdata(self):
2950 return {
2951 'format': 'csv',
2952 'traces': self._traces,
2953 }
2954
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002955 def _trim_log(self, logformat):
2956 """Reduces the amount of data in original log by generating a 'reduced'
2957 log.
2958 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002959 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002960 file_handle = codecs.open(
2961 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002962
2963 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002964 assert sys.getfilesystemencoding() == 'mbcs'
2965 file_handle = codecs.open(
2966 self._logname + '.' + logformat, 'r',
2967 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002968
2969 supported_events = LogmanTrace.Context.supported_events()
2970
2971 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002972 """Loads items from the generator and returns the interesting data.
2973
2974 It filters out any uninteresting line and reduce the amount of data in
2975 the trace.
2976 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002977 for index, line in enumerate(generator):
2978 if not index:
2979 if line != self.EXPECTED_HEADER:
2980 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002981 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002982 None, None, None)
2983 continue
2984 # As you can see, the CSV is full of useful non-redundant information:
2985 if (line[2] != '0' or # Event ID
2986 line[3] not in ('2', '3') or # Version
2987 line[4] != '0' or # Channel
2988 line[5] != '0' or # Level
2989 line[7] != '0' or # Task
2990 line[8] != '0x0000000000000000' or # Keyword
2991 line[12] != '' or # Instance ID
2992 line[13] != '' or # Parent Instance ID
2993 line[14] != self.NULL_GUID or # Activity ID
2994 line[15] != ''): # Related Activity ID
2995 raise TracingFailure(
2996 'Found unexpected values in line: %s' % ' '.join(line),
2997 None, None, None)
2998
2999 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3000 continue
3001
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003002 yield [
3003 line[self.EVENT_NAME],
3004 line[self.TYPE],
3005 line[self.PID],
3006 line[self.TID],
3007 line[self.PROCESSOR_ID],
3008 line[self.TIMESTAMP],
3009 ] + line[self.USER_DATA:]
3010
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003011 # must not convert the trim() call into a list, since it will use too much
3012 # memory for large trace. use a csv file as a workaround since the json
3013 # parser requires a complete in-memory file.
3014 with open('%s.preprocessed' % self._logname, 'wb') as f:
3015 # $ and * can't be used in file name on windows, reducing the likelihood
3016 # of having to escape a string.
3017 out = csv.writer(
3018 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003019 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003020 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003021
3022 def _convert_log(self, logformat):
3023 """Converts the ETL trace to text representation.
3024
3025 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3026 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3027
3028 Arguments:
3029 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3030
3031 Use "tracerpt -?" for help.
3032 """
3033 LOCALE_INVARIANT = 0x7F
3034 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3035 cmd_convert = [
3036 'tracerpt.exe',
3037 '-l', self._logname + '.etl',
3038 '-o', self._logname + '.' + logformat,
3039 '-gmt', # Use UTC
3040 '-y', # No prompt
3041 # Use -of XML to get the header of each items after column 19, e.g. all
3042 # the actual headers of 'User Data'.
3043 ]
3044
3045 if logformat == 'csv':
3046 # tracerpt localizes the 'Type' column, for major brainfuck
3047 # entertainment. I can't imagine any sane reason to do that.
3048 cmd_convert.extend(['-of', 'CSV'])
3049 elif logformat == 'csv_utf16':
3050 # This causes it to use UTF-16, which doubles the log size but ensures
3051 # the log is readable for non-ASCII characters.
3052 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3053 elif logformat == 'xml':
3054 cmd_convert.extend(['-of', 'XML'])
3055 else:
3056 raise ValueError('Unexpected log format \'%s\'' % logformat)
3057 logging.debug('Running: %s' % cmd_convert)
3058 # This can takes tens of minutes for large logs.
3059 # Redirects all output to stderr.
3060 subprocess.check_call(
3061 cmd_convert,
3062 stdin=subprocess.PIPE,
3063 stdout=sys.stderr,
3064 stderr=sys.stderr)
3065
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003066 def __init__(self, use_sudo=False): # pylint: disable=W0613
3067 super(LogmanTrace, self).__init__()
3068 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3069
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003070 @staticmethod
3071 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003072 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003073 if os.path.isfile(logname + ext):
3074 os.remove(logname + ext)
3075
3076 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003077 def parse_log(cls, logname, blacklist, trace_name):
3078 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003079 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003080
3081 def blacklist_more(filepath):
3082 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3083 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3084
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003085 # Create a list of (Context, result_dict) tuples. This is necessary because
3086 # the csv file may be larger than the amount of available memory.
3087 contexes = [
3088 (
3089 cls.Context(
3090 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3091 {
3092 'output': item['output'],
3093 'trace': item['trace'],
3094 },
3095 )
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05003096 for item in tools.read_json(logname)['traces']
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003097 if not trace_name or item['trace'] == trace_name
3098 ]
3099
3100 # The log may be too large to fit in memory and it is not efficient to read
3101 # it multiple times, so multiplex the contexes instead, which is slightly
3102 # more awkward.
3103 with open('%s.preprocessed' % logname, 'rb') as f:
3104 lines = csv.reader(
3105 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3106 for encoded in lines:
3107 line = [s.decode('utf-8') for s in encoded]
3108 # Convert the PID in-place from hex.
3109 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3110 for context in contexes:
3111 if 'exception' in context[1]:
3112 continue
3113 try:
3114 context[0].on_line(line)
3115 except TracingFailure:
3116 context[1]['exception'] = sys.exc_info()
3117
3118 for context in contexes:
3119 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003120 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003121 context[1]['results'] = context[0].to_results()
3122
3123 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003124
3125
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003126def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003127 """Returns the correct implementation for the current OS."""
3128 if sys.platform == 'cygwin':
3129 raise NotImplementedError(
3130 'Not implemented for cygwin, start the script from Win32 python')
3131 flavors = {
3132 'win32': LogmanTrace,
3133 'darwin': Dtrace,
3134 'sunos5': Dtrace,
3135 'freebsd7': Dtrace,
3136 'freebsd8': Dtrace,
3137 }
3138 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003139 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003140
3141
3142def extract_directories(root_dir, files, blacklist):
3143 """Detects if all the files in a directory are in |files| and if so, replace
3144 the individual files by a Results.Directory instance.
3145
3146 Takes a list of Results.File instances and returns a shorter list of
3147 Results.File and Results.Directory instances.
3148
3149 Arguments:
3150 - root_dir: Optional base directory that shouldn't be search further.
3151 - files: list of Results.File instances.
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003152 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003153 """
3154 logging.info(
3155 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3156 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003157 # It is important for root_dir to not be a symlinked path, make sure to call
3158 # os.path.realpath() as needed.
3159 assert not root_dir or (
maruel@chromium.org561d4b22013-09-26 21:08:08 +00003160 os.path.realpath(file_path.get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003161 assert not any(isinstance(f, Results.Directory) for f in files)
3162 # Remove non existent files.
3163 files = [f for f in files if f.existent]
3164 if not files:
3165 return files
3166 # All files must share the same root, which can be None.
3167 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3168
3169 # Creates a {directory: {filename: File}} mapping, up to root.
3170 buckets = {}
3171 if root_dir:
3172 buckets[root_dir] = {}
3173 for fileobj in files:
3174 path = fileobj.full_path
3175 directory = os.path.dirname(path)
3176 assert directory
3177 # Do not use os.path.basename() so trailing os.path.sep is kept.
3178 basename = path[len(directory)+1:]
3179 files_in_directory = buckets.setdefault(directory, {})
3180 files_in_directory[basename] = fileobj
3181 # Add all the directories recursively up to root.
3182 while True:
3183 old_d = directory
3184 directory = os.path.dirname(directory)
3185 if directory + os.path.sep == root_dir or directory == old_d:
3186 break
3187 buckets.setdefault(directory, {})
3188
3189 root_prefix = len(root_dir) + 1 if root_dir else 0
3190 for directory in sorted(buckets, reverse=True):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003191 if not os.path.isdir(directory):
3192 logging.debug(
3193 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3194 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003195 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3196 expected = set(buckets[directory])
3197 if not (actual - expected):
3198 parent = os.path.dirname(directory)
3199 buckets[parent][os.path.basename(directory)] = Results.Directory(
3200 root_dir,
3201 directory[root_prefix:],
3202 False,
3203 sum(f.size for f in buckets[directory].itervalues()),
3204 sum(f.nb_files for f in buckets[directory].itervalues()))
3205 # Remove the whole bucket.
3206 del buckets[directory]
3207
3208 # Reverse the mapping with what remains. The original instances are returned,
3209 # so the cached meta data is kept.
3210 files = sum((x.values() for x in buckets.itervalues()), [])
3211 return sorted(files, key=lambda x: x.path)
3212
3213
3214def trace(logfile, cmd, cwd, api, output):
3215 """Traces an executable. Returns (returncode, output) from api.
3216
3217 Arguments:
3218 - logfile: file to write to.
3219 - cmd: command to run.
3220 - cwd: current directory to start the process in.
3221 - api: a tracing api instance.
3222 - output: if True, returns output, otherwise prints it at the console.
3223 """
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003224 cmd = tools.fix_python_path(cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003225 api.clean_trace(logfile)
3226 with api.get_tracer(logfile) as tracer:
3227 return tracer.trace(cmd, cwd, 'default', output)
3228
3229
maruel@chromium.orge5322512013-08-19 20:17:57 +00003230def CMDclean(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003231 """Cleans up traces."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003232 options, args = parser.parse_args(args)
3233 api = get_api()
3234 api.clean_trace(options.log)
3235 return 0
3236
3237
maruel@chromium.orge5322512013-08-19 20:17:57 +00003238def CMDtrace(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003239 """Traces an executable."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003240 parser.allow_interspersed_args = False
3241 parser.add_option(
3242 '-q', '--quiet', action='store_true',
3243 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003244 parser.add_option(
3245 '-s', '--sudo', action='store_true',
3246 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3247 parser.add_option(
3248 '-n', '--no-sudo', action='store_false',
3249 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003250 options, args = parser.parse_args(args)
3251
3252 if not args:
3253 parser.error('Please provide a command to run')
3254
Marc-Antoine Ruel5906fcd2013-11-08 21:02:24 -05003255 if not can_trace():
3256 parser.error('Please rerun this program with admin privileges')
3257
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003258 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3259 args[0] = os.path.abspath(args[0])
3260
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003261 # options.sudo default value is None, which is to do whatever tracer defaults
3262 # do.
3263 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003264 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3265
3266
maruel@chromium.orge5322512013-08-19 20:17:57 +00003267def CMDread(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003268 """Reads the logs and prints the result."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003269 parser.add_option(
3270 '-V', '--variable',
3271 nargs=2,
3272 action='append',
3273 dest='variables',
3274 metavar='VAR_NAME directory',
3275 default=[],
3276 help=('Variables to replace relative directories against. Example: '
3277 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3278 'home dir with $HOME') % getpass.getuser())
3279 parser.add_option(
3280 '--root-dir',
3281 help='Root directory to base everything off it. Anything outside of this '
3282 'this directory will not be reported')
3283 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003284 '--trace-name',
3285 help='Only reads one of the trace. Defaults to reading all traces')
3286 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003287 '-j', '--json', action='store_true',
3288 help='Outputs raw result data as json')
3289 parser.add_option(
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003290 '--trace-blacklist', action='append', default=[],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003291 help='List of regexp to use as blacklist filter')
3292 options, args = parser.parse_args(args)
3293
3294 if options.root_dir:
maruel@chromium.org561d4b22013-09-26 21:08:08 +00003295 options.root_dir = file_path.get_native_path_case(
maruel@chromium.org7dc17b32013-04-02 22:27:05 +00003296 unicode(os.path.abspath(options.root_dir)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003297
3298 variables = dict(options.variables)
3299 api = get_api()
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003300 blacklist = gen_blacklist(options.trace_blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003301 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003302 # Process each trace.
3303 output_as_json = []
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003304 try:
3305 for item in data:
3306 if 'exception' in item:
3307 # Do not abort the other traces.
3308 print >> sys.stderr, (
3309 'Trace %s: Got an exception: %s' % (
3310 item['trace'], item['exception'][1]))
3311 continue
3312 results = item['results']
3313 if options.root_dir:
3314 results = results.strip_root(options.root_dir)
3315
3316 if options.json:
3317 output_as_json.append(results.flatten())
3318 else:
3319 simplified = extract_directories(
3320 options.root_dir, results.files, blacklist)
3321 simplified = [f.replace_variables(variables) for f in simplified]
3322 if len(data) > 1:
3323 print('Trace: %s' % item['trace'])
3324 print('Total: %d' % len(results.files))
3325 print('Non existent: %d' % len(results.non_existent))
3326 for f in results.non_existent:
3327 print(' %s' % f.path)
3328 print(
3329 'Interesting: %d reduced to %d' % (
3330 len(results.existent), len(simplified)))
3331 for f in simplified:
3332 print(' %s' % f.path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003333
3334 if options.json:
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05003335 tools.write_json(sys.stdout, output_as_json, False)
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003336 except KeyboardInterrupt:
3337 return 1
3338 except IOError as e:
3339 if e.errno == errno.EPIPE:
3340 # Do not print a stack trace when the output is piped to less and the user
3341 # quits before the whole output was written.
3342 return 1
3343 raise
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003344 return 0
3345
3346
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003347class OptionParserTraceInputs(tools.OptionParserWithLogging):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003348 """Adds automatic --log handling."""
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003349
3350 # Disable --log-file options since both --log and --log-file options are
3351 # confusing.
3352 # TODO(vadimsh): Rename --log-file or --log to something else.
3353 enable_log_file = False
3354
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003355 def __init__(self, **kwargs):
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003356 tools.OptionParserWithLogging.__init__(self, **kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003357 self.add_option(
3358 '-l', '--log', help='Log file to generate or read, required')
3359
3360 def parse_args(self, *args, **kwargs):
3361 """Makes sure the paths make sense.
3362
3363 On Windows, / and \ are often mixed together in a path.
3364 """
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003365 options, args = tools.OptionParserWithLogging.parse_args(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003366 self, *args, **kwargs)
3367 if not options.log:
3368 self.error('Must supply a log file with -l')
3369 options.log = os.path.abspath(options.log)
3370 return options, args
3371
3372
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003373def main(argv):
maruel@chromium.orge5322512013-08-19 20:17:57 +00003374 dispatcher = subcommand.CommandDispatcher(__name__)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003375 try:
maruel@chromium.org3d671992013-08-20 00:38:27 +00003376 return dispatcher.execute(
3377 OptionParserTraceInputs(version=__version__), argv)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003378 except TracingFailure, e:
3379 sys.stderr.write('\nError: ')
3380 sys.stderr.write(str(e))
3381 sys.stderr.write('\n')
3382 return 1
3383
3384
3385if __name__ == '__main__':
maruel@chromium.orge5322512013-08-19 20:17:57 +00003386 fix_encoding.fix_encoding()
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003387 tools.disable_buffering()
maruel@chromium.orge5322512013-08-19 20:17:57 +00003388 colorama.init()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003389 sys.exit(main(sys.argv[1:]))