blob: 1005e5654ea1125d8701b7a7a80eae7de19e9c50 [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05003# Copyright 2012 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05004# Use of this source code is governed under the Apache License, Version 2.0 that
5# can be found in the LICENSE file.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00006
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
maruel@chromium.org9e56b592013-05-21 16:56:05 +000023import errno
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000024import getpass
25import glob
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000026import logging
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000027import os
28import re
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000029import stat
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000030import subprocess
31import sys
32import tempfile
33import threading
34import time
35import weakref
36
maruel@chromium.orge5322512013-08-19 20:17:57 +000037from third_party import colorama
38from third_party.depot_tools import fix_encoding
39from third_party.depot_tools import subcommand
40
maruel@chromium.org561d4b22013-09-26 21:08:08 +000041from utils import file_path
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000042from utils import tools
43
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000044## OS-specific imports
45
46if sys.platform == 'win32':
maruel@chromium.org561d4b22013-09-26 21:08:08 +000047 from ctypes.wintypes import byref, c_int, c_wchar_p
48 from ctypes.wintypes import windll # pylint: disable=E0611
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000049
50
maruel@chromium.org3d671992013-08-20 00:38:27 +000051__version__ = '0.1'
52
53
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000054BASE_DIR = os.path.dirname(os.path.abspath(__file__))
55ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
56
57
58class TracingFailure(Exception):
59 """An exception occured during tracing."""
60 def __init__(self, description, pid, line_number, line, *args):
61 super(TracingFailure, self).__init__(
62 description, pid, line_number, line, *args)
63 self.description = description
64 self.pid = pid
65 self.line_number = line_number
66 self.line = line
67 self.extra = args
68
69 def __str__(self):
70 out = self.description
71 if self.pid:
72 out += '\npid: %d' % self.pid
73 if self.line_number:
74 out += '\nline: %d' % self.line_number
75 if self.line:
76 out += '\n%s' % self.line
77 if self.extra:
78 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
79 return out
80
81
82## OS-specific functions
83
84if sys.platform == 'win32':
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000085 def get_current_encoding():
86 """Returns the 'ANSI' code page associated to the process."""
87 return 'cp%d' % int(windll.kernel32.GetACP())
88
89
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000090 def CommandLineToArgvW(command_line):
91 """Splits a commandline into argv using CommandLineToArgvW()."""
92 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
93 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +000094 assert isinstance(command_line, unicode)
95 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000096 try:
97 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
98 finally:
99 windll.kernel32.LocalFree(ptr)
100
101
Marc-Antoine Ruel5906fcd2013-11-08 21:02:24 -0500102def can_trace():
103 """Returns True if the user is an administrator on Windows.
104
105 It is required for tracing to work.
106 """
107 if sys.platform != 'win32':
108 return True
109 return bool(windll.shell32.IsUserAnAdmin())
110
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000111
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000112def create_subprocess_thunk():
113 """Creates a small temporary script to start the child process.
114
115 This thunk doesn't block, its unique name is used to identify it as the
116 parent.
117 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000118 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000119 try:
120 os.write(
121 handle,
122 (
123 'import subprocess, sys\n'
124 'sys.exit(subprocess.call(sys.argv[2:]))\n'
125 ))
126 finally:
127 os.close(handle)
128 return name
129
130
131def create_exec_thunk():
132 """Creates a small temporary script to start the child executable.
133
134 Reads from the file handle provided as the fisrt argument to block, then
135 execv() the command to be traced.
136 """
137 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
138 try:
139 os.write(
140 handle,
141 (
142 'import os, sys\n'
143 'fd = int(sys.argv[1])\n'
144 # This will block until the controlling process writes a byte on the
145 # pipe. It will do so once the tracing tool, e.g. strace, is ready to
146 # trace.
147 'os.read(fd, 1)\n'
148 'os.close(fd)\n'
149 'os.execv(sys.argv[2], sys.argv[2:])\n'
150 ))
151 finally:
152 os.close(handle)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000153 return name
154
155
156def strace_process_quoted_arguments(text):
157 """Extracts quoted arguments on a string and return the arguments as a list.
158
159 Implemented as an automaton. Supports incomplete strings in the form
160 '"foo"...'.
161
162 Example:
163 With text = '"foo", "bar"', the function will return ['foo', 'bar']
164
165 TODO(maruel): Implement escaping.
166 """
167 # All the possible states of the DFA.
168 ( NEED_QUOTE, # Begining of a new arguments.
169 INSIDE_STRING, # Inside an argument.
170 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
171 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
172 # a serie of 3 dots or a comma.
173 NEED_SPACE, # Right after a comma
174 NEED_DOT_2, # Found a dot, need a second one.
175 NEED_DOT_3, # Found second dot, need a third one.
176 NEED_COMMA, # Found third dot, need a comma.
177 ) = range(8)
178
179 state = NEED_QUOTE
180 out = []
181 for index, char in enumerate(text):
182 if char == '"':
183 if state == NEED_QUOTE:
184 state = INSIDE_STRING
185 # A new argument was found.
186 out.append('')
187 elif state == INSIDE_STRING:
188 # The argument is now closed.
189 state = NEED_COMMA_OR_DOT
190 elif state == ESCAPED:
191 out[-1] += char
192 state = INSIDE_STRING
193 else:
194 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000195 'Can\'t process char \'%s\' at column %d for: %r' % (
196 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000197 index,
198 text)
199 elif char == ',':
200 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
201 state = NEED_SPACE
202 elif state == INSIDE_STRING:
203 out[-1] += char
204 elif state == ESCAPED:
205 out[-1] += char
206 state = INSIDE_STRING
207 else:
208 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000209 'Can\'t process char \'%s\' at column %d for: %r' % (
210 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000211 index,
212 text)
213 elif char == ' ':
214 if state == NEED_SPACE:
215 state = NEED_QUOTE
216 elif state == INSIDE_STRING:
217 out[-1] += char
218 elif state == ESCAPED:
219 out[-1] += char
220 state = INSIDE_STRING
221 else:
222 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000223 'Can\'t process char \'%s\' at column %d for: %r' % (
224 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000225 index,
226 text)
227 elif char == '.':
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000228 if state in (NEED_QUOTE, NEED_COMMA_OR_DOT):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000229 # The string is incomplete, this mean the strace -s flag should be
230 # increased.
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000231 # For NEED_QUOTE, the input string would look like '"foo", ...'.
232 # For NEED_COMMA_OR_DOT, the input string would look like '"foo"...'
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000233 state = NEED_DOT_2
234 elif state == NEED_DOT_2:
235 state = NEED_DOT_3
236 elif state == NEED_DOT_3:
237 state = NEED_COMMA
238 elif state == INSIDE_STRING:
239 out[-1] += char
240 elif state == ESCAPED:
241 out[-1] += char
242 state = INSIDE_STRING
243 else:
244 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000245 'Can\'t process char \'%s\' at column %d for: %r' % (
246 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000247 index,
248 text)
249 elif char == '\\':
250 if state == ESCAPED:
251 out[-1] += char
252 state = INSIDE_STRING
253 elif state == INSIDE_STRING:
254 state = ESCAPED
255 else:
256 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000257 'Can\'t process char \'%s\' at column %d for: %r' % (
258 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000259 index,
260 text)
261 else:
262 if state == INSIDE_STRING:
263 out[-1] += char
264 else:
265 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000266 'Can\'t process char \'%s\' at column %d for: %r' % (
267 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000268 index,
269 text)
270 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
271 raise ValueError(
272 'String is incorrectly terminated: %r' % text,
273 text)
274 return out
275
276
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000277def assert_is_renderable(pseudo_string):
278 """Asserts the input is a valid object to be processed by render()."""
279 assert (
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000280 pseudo_string is None or
281 isinstance(pseudo_string, unicode) or
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000282 hasattr(pseudo_string, 'render')), repr(pseudo_string)
283
284
285def render(pseudo_string):
286 """Converts the pseudo-string to an unicode string."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000287 if pseudo_string is None or isinstance(pseudo_string, unicode):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000288 return pseudo_string
289 return pseudo_string.render()
290
291
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000292class Results(object):
293 """Results of a trace session."""
294
295 class _TouchedObject(object):
296 """Something, a file or a directory, that was accessed."""
297 def __init__(self, root, path, tainted, size, nb_files):
298 logging.debug(
299 '%s(%s, %s, %s, %s, %s)' %
300 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000301 assert_is_renderable(root)
302 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000303 self.root = root
304 self.path = path
305 self.tainted = tainted
306 self.nb_files = nb_files
maruel@chromium.org538141b2013-06-03 20:57:17 +0000307 # Can be used as a cache or a default value, depending on context. In
308 # particular, once self.tainted is True, because the path was replaced
309 # with a variable, it is not possible to look up the file size.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000310 self._size = size
311 # These are cache only.
312 self._real_path = None
313
314 # Check internal consistency.
315 assert path, path
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000316 assert tainted or bool(root) != bool(file_path.isabs(path)), (root, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000317 assert tainted or (
318 not os.path.exists(self.full_path) or
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000319 (self.full_path == file_path.get_native_path_case(self.full_path))), (
320 tainted,
321 self.full_path,
322 file_path.get_native_path_case(self.full_path))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000323
324 @property
325 def existent(self):
326 return self.size != -1
327
328 @property
329 def full_path(self):
330 if self.root:
331 return os.path.join(self.root, self.path)
332 return self.path
333
334 @property
335 def real_path(self):
336 """Returns the path with symlinks resolved."""
337 if not self._real_path:
338 self._real_path = os.path.realpath(self.full_path)
339 return self._real_path
340
341 @property
342 def size(self):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000343 """File's size. -1 is not existent.
344
345 Once tainted, it is not possible the retrieve the file size anymore since
346 the path is composed of variables.
347 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000348 if self._size is None and not self.tainted:
349 try:
350 self._size = os.stat(self.full_path).st_size
351 except OSError:
352 self._size = -1
353 return self._size
354
355 def flatten(self):
356 """Returns a dict representing this object.
357
358 A 'size' of 0 means the file was only touched and not read.
359 """
360 return {
361 'path': self.path,
362 'size': self.size,
363 }
364
365 def replace_variables(self, variables):
366 """Replaces the root of this File with one of the variables if it matches.
367
368 If a variable replacement occurs, the cloned object becomes tainted.
369 """
370 for variable, root_path in variables.iteritems():
371 if self.path.startswith(root_path):
372 return self._clone(
373 self.root, variable + self.path[len(root_path):], True)
374 # No need to clone, returns ourself.
375 return self
376
377 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000378 """Returns a clone of itself with 'root' stripped off.
379
380 Note that the file is kept if it is either accessible from a symlinked
381 path that was used to access the file or through the real path.
382 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000383 # Check internal consistency.
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000384 assert (
385 self.tainted or
386 (file_path.isabs(root) and root.endswith(os.path.sep))), root
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000387 if not self.full_path.startswith(root):
388 # Now try to resolve the symlinks to see if it can be reached this way.
389 # Only try *after* trying without resolving symlink.
390 if not self.real_path.startswith(root):
391 return None
392 path = self.real_path
393 else:
394 path = self.full_path
395 return self._clone(root, path[len(root):], self.tainted)
396
397 def _clone(self, new_root, new_path, tainted):
398 raise NotImplementedError(self.__class__.__name__)
399
400 class File(_TouchedObject):
401 """A file that was accessed. May not be present anymore.
402
403 If tainted is true, it means it is not a real path anymore as a variable
404 replacement occured.
405
maruel@chromium.org538141b2013-06-03 20:57:17 +0000406 |mode| can be one of None, TOUCHED, READ or WRITE.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000407 """
maruel@chromium.org538141b2013-06-03 20:57:17 +0000408 # Was probed for existence, and it is existent, but was never _opened_.
409 TOUCHED = 't'
410 # Opened for read only and guaranteed to not have been written to.
411 READ = 'r'
412 # Opened for write.
413 WRITE = 'w'
414
415 # They are listed in order of priority. E.g. if a file is traced as TOUCHED
416 # then as WRITE, only keep WRITE. None means no idea, which is a problem on
417 # Windows.
418 ACCEPTABLE_MODES = (None, TOUCHED, READ, WRITE)
419
420 def __init__(self, root, path, tainted, size, mode):
421 assert mode in self.ACCEPTABLE_MODES
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000422 super(Results.File, self).__init__(root, path, tainted, size, 1)
maruel@chromium.org538141b2013-06-03 20:57:17 +0000423 self.mode = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000424
425 def _clone(self, new_root, new_path, tainted):
426 """Clones itself keeping meta-data."""
427 # Keep the self.size and self._real_path caches for performance reason. It
428 # is also important when the file becomes tainted (with a variable instead
429 # of the real path) since self.path is not an on-disk path anymore so
430 # out._size cannot be updated.
maruel@chromium.org538141b2013-06-03 20:57:17 +0000431 out = self.__class__(new_root, new_path, tainted, self.size, self.mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000432 out._real_path = self._real_path
433 return out
434
maruel@chromium.org538141b2013-06-03 20:57:17 +0000435 def flatten(self):
436 out = super(Results.File, self).flatten()
437 out['mode'] = self.mode
438 return out
439
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000440 class Directory(_TouchedObject):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000441 """A directory of files. Must exist.
442
443 For a Directory instance, self.size is not a cache, it's an actual value
444 that is never modified and represents the total size of the files contained
445 in this directory. It is possible that the directory is empty so that
446 size==0; this happens if there's only an invalid symlink in it.
447 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000448 def __init__(self, root, path, tainted, size, nb_files):
449 """path='.' is a valid value and must be handled appropriately."""
450 assert not path.endswith(os.path.sep), path
451 super(Results.Directory, self).__init__(
452 root, path + os.path.sep, tainted, size, nb_files)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000453
454 def flatten(self):
455 out = super(Results.Directory, self).flatten()
456 out['nb_files'] = self.nb_files
457 return out
458
459 def _clone(self, new_root, new_path, tainted):
460 """Clones itself keeping meta-data."""
461 out = self.__class__(
462 new_root,
463 new_path.rstrip(os.path.sep),
464 tainted,
465 self.size,
466 self.nb_files)
467 out._real_path = self._real_path
468 return out
469
470 class Process(object):
471 """A process that was traced.
472
473 Contains references to the files accessed by this process and its children.
474 """
475 def __init__(self, pid, files, executable, command, initial_cwd, children):
476 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
477 self.pid = pid
478 self.files = sorted(files, key=lambda x: x.path)
479 self.children = children
480 self.executable = executable
481 self.command = command
482 self.initial_cwd = initial_cwd
483
484 # Check internal consistency.
485 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
486 f.path for f in self.files)
487 assert isinstance(self.children, list)
488 assert isinstance(self.files, list)
489
490 @property
491 def all(self):
492 for child in self.children:
493 for i in child.all:
494 yield i
495 yield self
496
497 def flatten(self):
498 return {
499 'children': [c.flatten() for c in self.children],
500 'command': self.command,
501 'executable': self.executable,
502 'files': [f.flatten() for f in self.files],
503 'initial_cwd': self.initial_cwd,
504 'pid': self.pid,
505 }
506
507 def strip_root(self, root):
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000508 assert file_path.isabs(root) and root.endswith(os.path.sep), root
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000509 # Loads the files after since they are constructed as objects.
510 out = self.__class__(
511 self.pid,
512 filter(None, (f.strip_root(root) for f in self.files)),
513 self.executable,
514 self.command,
515 self.initial_cwd,
516 [c.strip_root(root) for c in self.children])
517 logging.debug(
518 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
519 return out
520
521 def __init__(self, process):
522 self.process = process
523 # Cache.
524 self._files = None
525
526 def flatten(self):
527 return {
528 'root': self.process.flatten(),
529 }
530
531 @property
532 def files(self):
533 if self._files is None:
534 self._files = sorted(
535 sum((p.files for p in self.process.all), []),
536 key=lambda x: x.path)
537 return self._files
538
539 @property
540 def existent(self):
541 return [f for f in self.files if f.existent]
542
543 @property
544 def non_existent(self):
545 return [f for f in self.files if not f.existent]
546
547 def strip_root(self, root):
548 """Returns a clone with all the files outside the directory |root| removed
549 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000550
551 It keeps files accessible through the |root| directory or that have been
552 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000553 """
554 # Resolve any symlink
555 root = os.path.realpath(root)
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000556 root = (
557 file_path.get_native_path_case(root).rstrip(os.path.sep) + os.path.sep)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000558 logging.debug('strip_root(%s)' % root)
559 return Results(self.process.strip_root(root))
560
561
562class ApiBase(object):
563 """OS-agnostic API to trace a process and its children."""
564 class Context(object):
565 """Processes one log line at a time and keeps the list of traced processes.
566
567 The parsing is complicated by the fact that logs are traced out of order for
568 strace but in-order for dtrace and logman. In addition, on Windows it is
569 very frequent that processids are reused so a flat list cannot be used. But
570 at the same time, it is impossible to faithfully construct a graph when the
571 logs are processed out of order. So both a tree and a flat mapping are used,
572 the tree is the real process tree, while the flat mapping stores the last
573 valid process for the corresponding processid. For the strace case, the
574 tree's head is guessed at the last moment.
575 """
576 class Process(object):
577 """Keeps context for one traced child process.
578
579 Logs all the files this process touched. Ignores directories.
580 """
581 def __init__(self, blacklist, pid, initial_cwd):
582 # Check internal consistency.
583 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000584 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000585 self.pid = pid
586 # children are Process instances.
587 self.children = []
588 self.initial_cwd = initial_cwd
589 self.cwd = None
maruel@chromium.org538141b2013-06-03 20:57:17 +0000590 self.files = {}
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000591 self.executable = None
592 self.command = None
593 self._blacklist = blacklist
594
595 def to_results_process(self):
596 """Resolves file case sensitivity and or late-bound strings."""
597 # When resolving files, it's normal to get dupe because a file could be
598 # opened multiple times with different case. Resolve the deduplication
599 # here.
600 def fix_path(x):
601 """Returns the native file path case.
602
603 Converts late-bound strings.
604 """
605 if not x:
606 # Do not convert None instance to 'None'.
607 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000608 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000609 if os.path.isabs(x):
610 # If the path is not absolute, which tends to happen occasionally on
611 # Windows, it is not possible to get the native path case so ignore
612 # that trace. It mostly happens for 'executable' value.
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000613 x = file_path.get_native_path_case(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000614 return x
615
maruel@chromium.org538141b2013-06-03 20:57:17 +0000616 def fix_and_blacklist_path(x, m):
617 """Receives a tuple (filepath, mode) and processes filepath."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000618 x = fix_path(x)
619 if not x:
620 return
621 # The blacklist needs to be reapplied, since path casing could
622 # influence blacklisting.
623 if self._blacklist(x):
624 return
maruel@chromium.org538141b2013-06-03 20:57:17 +0000625 # Filters out directories. Some may have passed through.
626 if os.path.isdir(x):
627 return
628 return x, m
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000629
maruel@chromium.org538141b2013-06-03 20:57:17 +0000630 # Renders all the files as strings, as some could be RelativePath
631 # instances. It is important to do it first since there could still be
632 # multiple entries with the same path but different modes.
633 rendered = (
634 fix_and_blacklist_path(f, m) for f, m in self.files.iteritems())
635 files = sorted(
636 (f for f in rendered if f),
637 key=lambda x: (x[0], Results.File.ACCEPTABLE_MODES.index(x[1])))
638 # Then converting into a dict will automatically clean up lesser
639 # important values.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000640 files = [
maruel@chromium.org538141b2013-06-03 20:57:17 +0000641 Results.File(None, f, False, None, m)
642 for f, m in dict(files).iteritems()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000643 ]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000644 return Results.Process(
645 self.pid,
646 files,
647 fix_path(self.executable),
648 self.command,
649 fix_path(self.initial_cwd),
650 [c.to_results_process() for c in self.children])
651
maruel@chromium.org538141b2013-06-03 20:57:17 +0000652 def add_file(self, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000653 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000654 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000655 return
maruel@chromium.org538141b2013-06-03 20:57:17 +0000656 logging.debug('add_file(%d, %s, %s)', self.pid, filepath, mode)
657 # Note that filepath and not render(filepath) is added. It is because
658 # filepath could be something else than a string, like a RelativePath
659 # instance for dtrace logs.
660 modes = Results.File.ACCEPTABLE_MODES
661 old_mode = self.files.setdefault(filepath, mode)
662 if old_mode != mode and modes.index(old_mode) < modes.index(mode):
663 # Take the highest value.
664 self.files[filepath] = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000665
666 def __init__(self, blacklist):
667 self.blacklist = blacklist
668 # Initial process.
669 self.root_process = None
670 # dict to accelerate process lookup, to not have to lookup the whole graph
671 # each time.
672 self._process_lookup = {}
673
674 class Tracer(object):
675 """During it's lifetime, the tracing subsystem is enabled."""
676 def __init__(self, logname):
677 self._logname = logname
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000678 self._lock = threading.RLock()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000679 self._traces = []
680 self._initialized = True
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000681 self._scripts_to_cleanup = []
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000682
683 def trace(self, cmd, cwd, tracename, output):
684 """Runs the OS-specific trace program on an executable.
685
686 Arguments:
687 - cmd: The command (a list) to run.
688 - cwd: Current directory to start the child process in.
689 - tracename: Name of the trace in the logname file.
690 - output: If False, redirects output to PIPEs.
691
692 Returns a tuple (resultcode, output) and updates the internal trace
693 entries.
694 """
695 # The implementation adds an item to self._traces.
696 raise NotImplementedError(self.__class__.__name__)
697
698 def close(self, _timeout=None):
699 """Saves the meta-data in the logname file.
700
701 For kernel-based tracing, stops the tracing subsystem.
702
703 Must not be used manually when using 'with' construct.
704 """
705 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000706 if not self._initialized:
707 raise TracingFailure(
708 'Called %s.close() on an unitialized object' %
709 self.__class__.__name__,
710 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000711 try:
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000712 while self._scripts_to_cleanup:
713 try:
714 os.remove(self._scripts_to_cleanup.pop())
715 except OSError as e:
716 logging.error('Failed to delete a temporary script: %s', e)
Marc-Antoine Ruelde011802013-11-12 15:19:47 -0500717 tools.write_json(self._logname, self._gen_logdata(), False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000718 finally:
719 self._initialized = False
720
721 def post_process_log(self):
722 """Post-processes the log so it becomes faster to load afterward.
723
724 Must not be used manually when using 'with' construct.
725 """
726 assert not self._initialized, 'Must stop tracing first.'
727
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000728 def _gen_logdata(self):
729 """Returns the data to be saved in the trace file."""
730 return {
731 'traces': self._traces,
732 }
733
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000734 def __enter__(self):
735 """Enables 'with' statement."""
736 return self
737
738 def __exit__(self, exc_type, exc_value, traceback):
739 """Enables 'with' statement."""
740 self.close()
741 # If an exception was thrown, do not process logs.
742 if not exc_type:
743 self.post_process_log()
744
745 def get_tracer(self, logname):
746 """Returns an ApiBase.Tracer instance.
747
748 Initializes the tracing subsystem, which is a requirement for kernel-based
749 tracers. Only one tracer instance should be live at a time!
750
751 logname is the filepath to the json file that will contain the meta-data
752 about the logs.
753 """
754 return self.Tracer(logname)
755
756 @staticmethod
757 def clean_trace(logname):
758 """Deletes an old log."""
759 raise NotImplementedError()
760
761 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +0000762 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000763 """Processes trace logs and returns the files opened and the files that do
764 not exist.
765
766 It does not track directories.
767
maruel@chromium.orgec74ff82012-10-29 18:14:47 +0000768 Arguments:
769 - logname: must be an absolute path.
770 - blacklist: must be a lambda.
771 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +0000772
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000773 Most of the time, files that do not exist are temporary test files that
774 should be put in /tmp instead. See http://crbug.com/116251.
775
776 Returns a list of dict with keys:
777 - results: A Results instance.
778 - trace: The corresponding tracename parameter provided to
779 get_tracer().trace().
780 - output: Output gathered during execution, if get_tracer().trace(...,
781 output=False) was used.
782 """
783 raise NotImplementedError(cls.__class__.__name__)
784
785
786class Strace(ApiBase):
787 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000788 @staticmethod
789 def load_filename(filename):
790 """Parses a filename in a log."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000791 # TODO(maruel): Be compatible with strace -x.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000792 assert isinstance(filename, str)
793 out = ''
794 i = 0
795 while i < len(filename):
796 c = filename[i]
797 if c == '\\':
798 out += chr(int(filename[i+1:i+4], 8))
799 i += 4
800 else:
801 out += c
802 i += 1
803 # TODO(maruel): That's not necessarily true that the current code page is
804 # utf-8.
805 return out.decode('utf-8')
806
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000807 class Context(ApiBase.Context):
808 """Processes a strace log line and keeps the list of existent and non
809 existent files accessed.
810
811 Ignores directories.
812
813 Uses late-binding to processes the cwd of each process. The problem is that
814 strace generates one log file per process it traced but doesn't give any
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000815 information about which process was started when and by who. So process the
816 logs out of order and use late binding with RelativePath to be able to
817 deduce the initial directory of each process once all the logs are parsed.
818
819 TODO(maruel): Use the script even in the non-sudo case, so log parsing can
820 be done in two phase: first find the root process, then process the child
821 processes in order. With that, it should be possible to not use RelativePath
822 anymore. This would significantly simplify the code!
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000823 """
824 class Process(ApiBase.Context.Process):
825 """Represents the state of a process.
826
827 Contains all the information retrieved from the pid-specific log.
828 """
829 # Function names are using ([a-z_0-9]+)
830 # This is the most common format. function(args) = result
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000831 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.*?)\)\s+= (.+)$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000832 # An interrupted function call, only grab the minimal header.
833 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
834 # A resumed function call.
835 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
836 # A process received a signal.
837 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
838 # A process didn't handle a signal. Ignore any junk appearing before,
839 # because the process was forcibly killed so it won't open any new file.
840 RE_KILLED = re.compile(
841 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
842 # The process has exited.
843 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
844 # A call was canceled. Ignore any prefix.
845 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
846 # Happens when strace fails to even get the function name.
847 UNNAMED_FUNCTION = '????'
848
849 # Corner-case in python, a class member function decorator must not be
850 # @staticmethod.
851 def parse_args(regexp, expect_zero): # pylint: disable=E0213
852 """Automatically convert the str 'args' into a list of processed
853 arguments.
854
855 Arguments:
856 - regexp is used to parse args.
857 - expect_zero: one of True, False or None.
858 - True: will check for result.startswith('0') first and will ignore
859 the trace line completely otherwise. This is important because for
860 many functions, the regexp will not process if the call failed.
861 - False: will check for not result.startswith(('?', '-1')) for the
862 same reason than with True.
863 - None: ignore result.
864 """
865 def meta_hook(function):
866 assert function.__name__.startswith('handle_')
867 def hook(self, args, result):
868 if expect_zero is True and not result.startswith('0'):
869 return
870 if expect_zero is False and result.startswith(('?', '-1')):
871 return
872 match = re.match(regexp, args)
873 if not match:
874 raise TracingFailure(
875 'Failed to parse %s(%s) = %s' %
876 (function.__name__[len('handle_'):], args, result),
877 None, None, None)
878 return function(self, match.groups(), result)
879 return hook
880 return meta_hook
881
882 class RelativePath(object):
883 """A late-bound relative path."""
884 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000885 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000886 self.parent = parent
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000887 assert (
888 value is None or
889 (isinstance(value, unicode) and not os.path.isabs(value)))
890 self.value = value
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000891 if self.value:
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000892 # TODO(maruel): On POSIX, '\\' is a valid character so remove this
893 # assert.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000894 assert '\\' not in self.value, value
895 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000896
897 def render(self):
898 """Returns the current directory this instance is representing.
899
900 This function is used to return the late-bound value.
901 """
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000902 assert self.parent is not None
903 parent = render(self.parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000904 if self.value:
905 return os.path.normpath(os.path.join(parent, self.value))
906 return parent
907
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000908 def __init__(self, root, pid):
909 """Keeps enough information to be able to guess the original process
910 root.
911
912 strace doesn't store which process was the initial process. So more
913 information needs to be kept so the graph can be reconstructed from the
914 flat map.
915 """
916 logging.info('%s(%d)' % (self.__class__.__name__, pid))
917 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
918 assert isinstance(root, ApiBase.Context)
919 self._root = weakref.ref(root)
920 # The dict key is the function name of the pending call, like 'open'
921 # or 'execve'.
922 self._pending_calls = {}
923 self._line_number = 0
924 # Current directory when the process started.
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000925 if isinstance(self._root(), unicode):
926 self.initial_cwd = self._root()
927 else:
928 self.initial_cwd = self.RelativePath(self._root(), None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000929 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +0000930 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000931
932 def get_cwd(self):
933 """Returns the best known value of cwd."""
934 return self.cwd or self.initial_cwd
935
936 def render(self):
937 """Returns the string value of the RelativePath() object.
938
939 Used by RelativePath. Returns the initial directory and not the
940 current one since the current directory 'cwd' validity is time-limited.
941
942 The validity is only guaranteed once all the logs are processed.
943 """
944 return self.initial_cwd.render()
945
946 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000947 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000948 self._line_number += 1
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000949 try:
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000950 if self._done:
951 raise TracingFailure(
952 'Found a trace for a terminated process or corrupted log',
953 None, None, None)
954
955 if self.RE_SIGNAL.match(line):
956 # Ignore signals.
957 return
958
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000959 match = self.RE_KILLED.match(line)
960 if match:
961 # Converts a '+++ killed by Foo +++' trace into an exit_group().
962 self.handle_exit_group(match.group(1), None)
963 return
964
965 match = self.RE_PROCESS_EXITED.match(line)
966 if match:
967 # Converts a '+++ exited with 1 +++' trace into an exit_group()
968 self.handle_exit_group(match.group(1), None)
969 return
970
971 match = self.RE_UNFINISHED.match(line)
972 if match:
973 if match.group(1) in self._pending_calls:
974 raise TracingFailure(
975 'Found two unfinished calls for the same function',
976 None, None, None,
977 self._pending_calls)
978 self._pending_calls[match.group(1)] = (
979 match.group(1) + match.group(2))
980 return
981
982 match = self.RE_UNAVAILABLE.match(line)
983 if match:
984 # This usually means a process was killed and a pending call was
985 # canceled.
986 # TODO(maruel): Look up the last exit_group() trace just above and
987 # make sure any self._pending_calls[anything] is properly flushed.
988 return
989
990 match = self.RE_RESUMED.match(line)
991 if match:
992 if match.group(1) not in self._pending_calls:
993 raise TracingFailure(
994 'Found a resumed call that was not logged as unfinished',
995 None, None, None,
996 self._pending_calls)
997 pending = self._pending_calls.pop(match.group(1))
998 # Reconstruct the line.
999 line = pending + match.group(2)
1000
1001 match = self.RE_HEADER.match(line)
1002 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001003 # The line is corrupted. It happens occasionally when a process is
1004 # killed forcibly with activity going on. Assume the process died.
1005 # No other line can be processed afterward.
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001006 logging.debug('%d is done: %s', self.pid, line)
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001007 self._done = True
1008 return
1009
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001010 if match.group(1) == self.UNNAMED_FUNCTION:
1011 return
1012
1013 # It's a valid line, handle it.
1014 handler = getattr(self, 'handle_%s' % match.group(1), None)
1015 if not handler:
1016 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1017 return handler(match.group(2), match.group(3))
1018 except TracingFailure, e:
1019 # Hack in the values since the handler could be a static function.
1020 e.pid = self.pid
1021 e.line = line
1022 e.line_number = self._line_number
1023 # Re-raise the modified exception.
1024 raise
1025 except (KeyError, NotImplementedError, ValueError), e:
1026 raise TracingFailure(
1027 'Trace generated a %s exception: %s' % (
1028 e.__class__.__name__, str(e)),
1029 self.pid,
1030 self._line_number,
1031 line,
1032 e)
1033
1034 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1035 def handle_access(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001036 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001037
1038 @parse_args(r'^\"(.+?)\"$', True)
1039 def handle_chdir(self, args, _result):
1040 """Updates cwd."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001041 self.cwd = self._mangle(args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001042 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1043
maruel@chromium.org538141b2013-06-03 20:57:17 +00001044 @parse_args(r'^\"(.+?)\", (\d+), (\d+)$', False)
1045 def handle_chown(self, args, _result):
1046 # TODO(maruel): Look at result?
1047 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org55550722013-05-28 14:43:14 +00001048
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001049 def handle_clone(self, _args, result):
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001050 self._handling_forking('clone', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001051
1052 def handle_close(self, _args, _result):
1053 pass
1054
maruel@chromium.org538141b2013-06-03 20:57:17 +00001055 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1056 def handle_chmod(self, args, _result):
1057 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001058
maruel@chromium.org538141b2013-06-03 20:57:17 +00001059 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1060 def handle_creat(self, args, _result):
1061 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001062
1063 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1064 def handle_execve(self, args, _result):
1065 # Even if in practice execve() doesn't returns when it succeeds, strace
1066 # still prints '0' as the result.
1067 filepath = args[0]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001068 self._handle_file(filepath, Results.File.READ)
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001069 self.executable = self._mangle(filepath)
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001070 try:
1071 self.command = strace_process_quoted_arguments(args[1])
1072 except ValueError as e:
1073 raise TracingFailure(
1074 'Failed to process command line argument:\n%s' % e.args[0],
1075 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001076
1077 def handle_exit_group(self, _args, _result):
1078 """Removes cwd."""
1079 self.cwd = None
1080
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001081 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1082 def handle_faccessat(self, args, _results):
1083 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001084 self._handle_file(args[1], Results.File.TOUCHED)
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001085 else:
1086 raise Exception('Relative faccess not implemented.')
1087
maruel@chromium.org0781f322013-05-28 19:45:49 +00001088 def handle_fallocate(self, _args, result):
1089 pass
1090
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001091 def handle_fork(self, args, result):
1092 self._handle_unknown('fork', args, result)
1093
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001094 def handle_futex(self, _args, _result):
1095 pass
1096
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001097 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1098 def handle_getcwd(self, args, _result):
1099 if os.path.isabs(args[0]):
1100 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1101 if not isinstance(self.cwd, unicode):
1102 # Take the occasion to reset the path.
1103 self.cwd = self._mangle(args[0])
1104 else:
1105 # It should always match.
1106 assert self.cwd == Strace.load_filename(args[0]), (
1107 self.cwd, args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001108
1109 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1110 def handle_link(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001111 self._handle_file(args[0], Results.File.READ)
1112 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001113
1114 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1115 def handle_lstat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001116 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001117
1118 def handle_mkdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001119 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001120 pass
1121
1122 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1123 def handle_open(self, args, _result):
1124 if 'O_DIRECTORY' in args[1]:
1125 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001126 self._handle_file(
1127 args[0],
1128 Results.File.READ if 'O_RDONLY' in args[1] else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001129
1130 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1131 def handle_openat(self, args, _result):
1132 if 'O_DIRECTORY' in args[2]:
1133 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001134 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001135 self._handle_file(
1136 args[1],
1137 Results.File.READ if 'O_RDONLY' in args[2]
1138 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001139 else:
1140 # TODO(maruel): Implement relative open if necessary instead of the
1141 # AT_FDCWD flag, let's hope not since this means tracking all active
1142 # directory handles.
maruel@chromium.org538141b2013-06-03 20:57:17 +00001143 raise NotImplementedError('Relative open via openat not implemented.')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001144
1145 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1146 def handle_readlink(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001147 self._handle_file(args[0], Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001148
1149 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1150 def handle_rename(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001151 self._handle_file(args[0], Results.File.READ)
1152 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001153
1154 def handle_rmdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001155 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001156 pass
1157
1158 def handle_setxattr(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001159 # TODO(maruel): self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001160 pass
1161
1162 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1163 def handle_stat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001164 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001165
maruel@chromium.org538141b2013-06-03 20:57:17 +00001166 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1167 def handle_symlink(self, args, _result):
1168 self._handle_file(args[0], Results.File.TOUCHED)
1169 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001170
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001171 @parse_args(r'^\"(.+?)\", \d+', True)
1172 def handle_truncate(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001173 self._handle_file(args[0], Results.File.WRITE)
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001174
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001175 def handle_unlink(self, _args, _result):
1176 # In theory, the file had to be created anyway.
1177 pass
1178
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001179 def handle_unlinkat(self, _args, _result):
1180 # In theory, the file had to be created anyway.
1181 pass
1182
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001183 def handle_statfs(self, _args, _result):
1184 pass
1185
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001186 def handle_utimensat(self, _args, _result):
1187 pass
1188
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001189 def handle_vfork(self, _args, result):
1190 self._handling_forking('vfork', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001191
1192 @staticmethod
1193 def _handle_unknown(function, args, result):
1194 raise TracingFailure(
1195 'Unexpected/unimplemented trace %s(%s)= %s' %
1196 (function, args, result),
1197 None, None, None)
1198
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001199 def _handling_forking(self, name, result):
1200 """Transfers cwd."""
1201 if result.startswith(('?', '-1')):
1202 # The call failed.
1203 return
1204 # Update the other process right away.
1205 childpid = int(result)
1206 child = self._root().get_or_set_proc(childpid)
1207 if child.parentid is not None or childpid in self.children:
1208 raise TracingFailure(
1209 'Found internal inconsitency in process lifetime detection '
1210 'during a %s() call' % name,
1211 None, None, None)
1212
1213 # Copy the cwd object.
1214 child.initial_cwd = self.get_cwd()
1215 child.parentid = self.pid
1216 # It is necessary because the logs are processed out of order.
1217 self.children.append(child)
1218
maruel@chromium.org538141b2013-06-03 20:57:17 +00001219 def _handle_file(self, filepath, mode):
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001220 filepath = self._mangle(filepath)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001221 self.add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001222
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001223 def _mangle(self, filepath):
1224 """Decodes a filepath found in the log and convert it to a late-bound
1225 path if necessary.
1226
1227 |filepath| is an strace 'encoded' string and the returned value is
1228 either an unicode string if the path was absolute or a late bound path
1229 otherwise.
1230 """
1231 filepath = Strace.load_filename(filepath)
1232 if os.path.isabs(filepath):
1233 return filepath
1234 else:
1235 if isinstance(self.get_cwd(), unicode):
1236 return os.path.normpath(os.path.join(self.get_cwd(), filepath))
1237 return self.RelativePath(self.get_cwd(), filepath)
1238
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001239 def __init__(self, blacklist, root_pid, initial_cwd):
1240 """|root_pid| may be None when the root process is not known.
1241
1242 In that case, a search is done after reading all the logs to figure out
1243 the root process.
1244 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001245 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001246 assert_is_renderable(initial_cwd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001247 self.root_pid = root_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001248 self.initial_cwd = initial_cwd
1249
1250 def render(self):
1251 """Returns the string value of the initial cwd of the root process.
1252
1253 Used by RelativePath.
1254 """
1255 return self.initial_cwd
1256
1257 def on_line(self, pid, line):
1258 """Transfers control into the Process.on_line() function."""
1259 self.get_or_set_proc(pid).on_line(line.strip())
1260
1261 def to_results(self):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001262 """If necessary, finds back the root process and verify consistency."""
1263 if not self.root_pid:
1264 # The non-sudo case. The traced process was started by strace itself,
1265 # so the pid of the traced process is not known.
1266 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1267 if len(root) == 1:
1268 self.root_process = root[0]
1269 # Save it for later.
1270 self.root_pid = self.root_process.pid
1271 else:
1272 # The sudo case. The traced process was started manually so its pid is
1273 # known.
1274 self.root_process = self._process_lookup.get(self.root_pid)
1275 if not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001276 raise TracingFailure(
1277 'Found internal inconsitency in process lifetime detection '
1278 'while finding the root process',
1279 None,
1280 None,
1281 None,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001282 self.root_pid,
1283 sorted(self._process_lookup))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001284 process = self.root_process.to_results_process()
1285 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1286 raise TracingFailure(
1287 'Found internal inconsitency in process lifetime detection '
1288 'while looking for len(tree) == len(list)',
1289 None,
1290 None,
1291 None,
1292 sorted(self._process_lookup),
1293 sorted(p.pid for p in process.all))
1294 return Results(process)
1295
1296 def get_or_set_proc(self, pid):
1297 """Returns the Context.Process instance for this pid or creates a new one.
1298 """
1299 if not pid or not isinstance(pid, int):
1300 raise TracingFailure(
1301 'Unpexpected value for pid: %r' % pid,
1302 pid,
1303 None,
1304 None,
1305 pid)
1306 if pid not in self._process_lookup:
1307 self._process_lookup[pid] = self.Process(self, pid)
1308 return self._process_lookup[pid]
1309
1310 @classmethod
1311 def traces(cls):
1312 """Returns the list of all handled traces to pass this as an argument to
1313 strace.
1314 """
1315 prefix = 'handle_'
1316 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1317
1318 class Tracer(ApiBase.Tracer):
1319 MAX_LEN = 256
1320
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001321 def __init__(self, logname, use_sudo):
1322 super(Strace.Tracer, self).__init__(logname)
1323 self.use_sudo = use_sudo
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001324 if use_sudo:
1325 # TODO(maruel): Use the jump script systematically to make it easy to
1326 # figure out the root process, so RelativePath is not necessary anymore.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001327 self._child_script = create_exec_thunk()
1328 self._scripts_to_cleanup.append(self._child_script)
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001329
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001330 def trace(self, cmd, cwd, tracename, output):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001331 """Runs strace on an executable.
1332
1333 When use_sudo=True, it is a 3-phases process: start the thunk, start
1334 sudo strace with the pid of the thunk and then have the thunk os.execve()
1335 the process to trace.
1336 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001337 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1338 assert os.path.isabs(cmd[0]), cmd[0]
1339 assert os.path.isabs(cwd), cwd
1340 assert os.path.normpath(cwd) == cwd, cwd
1341 with self._lock:
1342 if not self._initialized:
1343 raise TracingFailure(
1344 'Called Tracer.trace() on an unitialized object',
1345 None, None, None, tracename)
1346 assert tracename not in (i['trace'] for i in self._traces)
1347 stdout = stderr = None
1348 if output:
1349 stdout = subprocess.PIPE
1350 stderr = subprocess.STDOUT
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001351
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001352 # Ensure all file related APIs are hooked.
1353 traces = ','.join(Strace.Context.traces() + ['file'])
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001354 flags = [
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001355 # Each child process has its own trace file. It is necessary because
1356 # strace may generate corrupted log file if multiple processes are
1357 # heavily doing syscalls simultaneously.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001358 '-ff',
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001359 # Reduce whitespace usage.
1360 '-a1',
1361 # hex encode non-ascii strings.
1362 # TODO(maruel): '-x',
1363 # TODO(maruel): '-ttt',
1364 # Signals are unnecessary noise here. Note the parser can cope with them
1365 # but reduce the unnecessary output.
1366 '-esignal=none',
1367 # Print as much data as wanted.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001368 '-s', '%d' % self.MAX_LEN,
1369 '-e', 'trace=%s' % traces,
1370 '-o', self._logname + '.' + tracename,
1371 ]
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001372
1373 if self.use_sudo:
1374 pipe_r, pipe_w = os.pipe()
1375 # Start the child process paused.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001376 target_cmd = [sys.executable, self._child_script, str(pipe_r)] + cmd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001377 logging.debug(' '.join(target_cmd))
1378 child_proc = subprocess.Popen(
1379 target_cmd,
1380 stdin=subprocess.PIPE,
1381 stdout=stdout,
1382 stderr=stderr,
1383 cwd=cwd)
1384
1385 # TODO(maruel): both processes must use the same UID for it to work
1386 # without sudo. Look why -p is failing at the moment without sudo.
1387 trace_cmd = [
1388 'sudo',
1389 'strace',
1390 '-p', str(child_proc.pid),
1391 ] + flags
1392 logging.debug(' '.join(trace_cmd))
1393 strace_proc = subprocess.Popen(
1394 trace_cmd,
1395 cwd=cwd,
1396 stdin=subprocess.PIPE,
1397 stdout=subprocess.PIPE,
1398 stderr=subprocess.PIPE)
1399
1400 line = strace_proc.stderr.readline()
1401 if not re.match(r'^Process \d+ attached \- interrupt to quit$', line):
1402 # TODO(maruel): Raise an exception.
1403 assert False, line
1404
1405 # Now fire the child process.
1406 os.write(pipe_w, 'x')
1407
1408 out = child_proc.communicate()[0]
1409 strace_out = strace_proc.communicate()[0]
1410
1411 # TODO(maruel): if strace_proc.returncode: Add an exception.
1412 saved_out = strace_out if strace_proc.returncode else out
1413 root_pid = child_proc.pid
1414 else:
1415 # Non-sudo case.
1416 trace_cmd = [
1417 'strace',
1418 ] + flags + cmd
1419 logging.debug(' '.join(trace_cmd))
1420 child_proc = subprocess.Popen(
1421 trace_cmd,
1422 cwd=cwd,
1423 stdin=subprocess.PIPE,
1424 stdout=stdout,
1425 stderr=stderr)
1426 out = child_proc.communicate()[0]
1427 # TODO(maruel): Walk the logs and figure out the root process would
1428 # simplify parsing the logs a *lot*.
1429 saved_out = out
1430 # The trace reader will have to figure out.
1431 root_pid = None
1432
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001433 with self._lock:
1434 assert tracename not in (i['trace'] for i in self._traces)
1435 self._traces.append(
1436 {
1437 'cmd': cmd,
1438 'cwd': cwd,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001439 'output': saved_out,
1440 'pid': root_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001441 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001442 })
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001443 return child_proc.returncode, out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001444
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001445 def __init__(self, use_sudo=None):
1446 super(Strace, self).__init__()
1447 self.use_sudo = use_sudo
1448
1449 def get_tracer(self, logname):
1450 return self.Tracer(logname, self.use_sudo)
1451
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001452 @staticmethod
1453 def clean_trace(logname):
1454 if os.path.isfile(logname):
1455 os.remove(logname)
1456 # Also delete any pid specific file from previous traces.
1457 for i in glob.iglob(logname + '.*'):
1458 if i.rsplit('.', 1)[1].isdigit():
1459 os.remove(i)
1460
1461 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001462 def parse_log(cls, logname, blacklist, trace_name):
1463 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001464 assert os.path.isabs(logname)
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05001465 data = tools.read_json(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001466 out = []
1467 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001468 if trace_name and item['trace'] != trace_name:
1469 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001470 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001471 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001472 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001473 }
1474 try:
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001475 context = cls.Context(blacklist, item['pid'], item['cwd'])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001476 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001477 logging.debug('Reading %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001478 pid = pidfile.rsplit('.', 1)[1]
1479 if pid.isdigit():
1480 pid = int(pid)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001481 found_line = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001482 for line in open(pidfile, 'rb'):
1483 context.on_line(pid, line)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001484 found_line = True
1485 if not found_line:
1486 # Ensures that a completely empty trace still creates the
1487 # corresponding Process instance by logging a dummy line.
1488 context.on_line(pid, '')
1489 else:
1490 logging.warning('Found unexpected file %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001491 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001492 except TracingFailure:
1493 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001494 out.append(result)
1495 return out
1496
1497
1498class Dtrace(ApiBase):
1499 """Uses DTrace framework through dtrace. Requires root access.
1500
1501 Implies Mac OSX.
1502
1503 dtruss can't be used because it has compatibility issues with python.
1504
1505 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1506 get the absolute path of the 'cwd' dtrace variable from the probe.
1507
1508 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1509 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1510 manually.
1511
1512 errno is not printed in the log since this implementation currently only cares
1513 about files that were successfully opened.
1514 """
1515 class Context(ApiBase.Context):
1516 # Format: index pid function(args)
1517 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1518
1519 # Arguments parsing.
1520 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1521 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1522 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1523 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1524 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1525 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1526
maruel@chromium.orgac361162013-06-04 15:54:06 +00001527 # O_DIRECTORY is not defined on Windows and dtrace doesn't exist on Windows.
1528 O_DIRECTORY = os.O_DIRECTORY if hasattr(os, 'O_DIRECTORY') else None
maruel@chromium.org538141b2013-06-03 20:57:17 +00001529 O_RDWR = os.O_RDWR
1530 O_WRONLY = os.O_WRONLY
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001531
1532 class Process(ApiBase.Context.Process):
1533 def __init__(self, *args):
1534 super(Dtrace.Context.Process, self).__init__(*args)
1535 self.cwd = self.initial_cwd
1536
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001537 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001538 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001539 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001540 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001541 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001542 # Process ID of the temporary script created by create_subprocess_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001543 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001544 self._initial_cwd = initial_cwd
1545 self._line_number = 0
1546
1547 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001548 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001549 self._line_number += 1
1550 match = self.RE_HEADER.match(line)
1551 if not match:
1552 raise TracingFailure(
1553 'Found malformed line: %s' % line,
1554 None,
1555 self._line_number,
1556 line)
1557 fn = getattr(
1558 self,
1559 'handle_%s' % match.group(2).replace('-', '_'),
1560 self._handle_ignored)
1561 # It is guaranteed to succeed because of the regexp. Or at least I thought
1562 # it would.
1563 pid = int(match.group(1))
1564 try:
1565 return fn(pid, match.group(3))
1566 except TracingFailure, e:
1567 # Hack in the values since the handler could be a static function.
1568 e.pid = pid
1569 e.line = line
1570 e.line_number = self._line_number
1571 # Re-raise the modified exception.
1572 raise
1573 except (KeyError, NotImplementedError, ValueError), e:
1574 raise TracingFailure(
1575 'Trace generated a %s exception: %s' % (
1576 e.__class__.__name__, str(e)),
1577 pid,
1578 self._line_number,
1579 line,
1580 e)
1581
1582 def to_results(self):
1583 process = self.root_process.to_results_process()
1584 # Internal concistency check.
1585 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1586 raise TracingFailure(
1587 'Found internal inconsitency in process lifetime detection '
1588 'while looking for len(tree) == len(list)',
1589 None,
1590 None,
1591 None,
1592 sorted(self._process_lookup),
1593 sorted(p.pid for p in process.all))
1594 return Results(process)
1595
1596 def handle_dtrace_BEGIN(self, _pid, args):
1597 if not self.RE_DTRACE_BEGIN.match(args):
1598 raise TracingFailure(
1599 'Found internal inconsitency in dtrace_BEGIN log line',
1600 None, None, None)
1601
1602 def handle_proc_start(self, pid, args):
1603 """Transfers cwd.
1604
1605 The dtrace script already takes care of only tracing the processes that
1606 are child of the traced processes so there is no need to verify the
1607 process hierarchy.
1608 """
1609 if pid in self._process_lookup:
1610 raise TracingFailure(
1611 'Found internal inconsitency in proc_start: %d started two times' %
1612 pid,
1613 None, None, None)
1614 match = self.RE_PROC_START.match(args)
1615 if not match:
1616 raise TracingFailure(
1617 'Failed to parse arguments: %s' % args,
1618 None, None, None)
1619 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001620 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001621 proc = self.root_process = self.Process(
1622 self.blacklist, pid, self._initial_cwd)
1623 elif ppid in self._process_lookup:
1624 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1625 self._process_lookup[ppid].children.append(proc)
1626 else:
1627 # Another process tree, ignore.
1628 return
1629 self._process_lookup[pid] = proc
1630 logging.debug(
1631 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001632 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001633
1634 def handle_proc_exit(self, pid, _args):
1635 """Removes cwd."""
1636 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001637 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001638 self._process_lookup[pid].cwd = None
1639
1640 def handle_execve(self, pid, args):
1641 """Sets the process' executable.
1642
1643 TODO(maruel): Read command line arguments. See
1644 https://discussions.apple.com/thread/1980539 for an example.
1645 https://gist.github.com/1242279
1646
1647 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1648 :)
1649 """
1650 if not pid in self._process_lookup:
1651 # Another process tree, ignore.
1652 return
1653 match = self.RE_EXECVE.match(args)
1654 if not match:
1655 raise TracingFailure(
1656 'Failed to parse arguments: %r' % args,
1657 None, None, None)
1658 proc = self._process_lookup[pid]
1659 proc.executable = match.group(1)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001660 self._handle_file(pid, proc.executable, Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001661 proc.command = self.process_escaped_arguments(match.group(3))
1662 if int(match.group(2)) != len(proc.command):
1663 raise TracingFailure(
1664 'Failed to parse execve() arguments: %s' % args,
1665 None, None, None)
1666
1667 def handle_chdir(self, pid, args):
1668 """Updates cwd."""
1669 if pid not in self._process_lookup:
1670 # Another process tree, ignore.
1671 return
1672 cwd = self.RE_CHDIR.match(args).group(1)
1673 if not cwd.startswith('/'):
1674 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1675 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1676 else:
1677 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1678 cwd2 = cwd
1679 self._process_lookup[pid].cwd = cwd2
1680
1681 def handle_open_nocancel(self, pid, args):
1682 """Redirects to handle_open()."""
1683 return self.handle_open(pid, args)
1684
1685 def handle_open(self, pid, args):
1686 if pid not in self._process_lookup:
1687 # Another process tree, ignore.
1688 return
1689 match = self.RE_OPEN.match(args)
1690 if not match:
1691 raise TracingFailure(
1692 'Failed to parse arguments: %s' % args,
1693 None, None, None)
1694 flag = int(match.group(2), 16)
1695 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1696 # Ignore directories.
1697 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001698 self._handle_file(
1699 pid,
1700 match.group(1),
1701 Results.File.READ if not ((self.O_RDWR | self.O_WRONLY) & flag)
1702 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001703
1704 def handle_rename(self, pid, args):
1705 if pid not in self._process_lookup:
1706 # Another process tree, ignore.
1707 return
1708 match = self.RE_RENAME.match(args)
1709 if not match:
1710 raise TracingFailure(
1711 'Failed to parse arguments: %s' % args,
1712 None, None, None)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001713 self._handle_file(pid, match.group(1), Results.File.READ)
1714 self._handle_file(pid, match.group(2), Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001715
maruel@chromium.org538141b2013-06-03 20:57:17 +00001716 def _handle_file(self, pid, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001717 if not filepath.startswith('/'):
1718 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1719 # We can get '..' in the path.
1720 filepath = os.path.normpath(filepath)
1721 # Sadly, still need to filter out directories here;
1722 # saw open_nocancel(".", 0, 0) = 0 lines.
1723 if os.path.isdir(filepath):
1724 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001725 self._process_lookup[pid].add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001726
1727 def handle_ftruncate(self, pid, args):
1728 """Just used as a signal to kill dtrace, ignoring."""
1729 pass
1730
1731 @staticmethod
1732 def _handle_ignored(pid, args):
1733 """Is called for all the event traces that are not handled."""
1734 raise NotImplementedError('Please implement me')
1735
1736 @staticmethod
1737 def process_escaped_arguments(text):
1738 """Extracts escaped arguments on a string and return the arguments as a
1739 list.
1740
1741 Implemented as an automaton.
1742
1743 Example:
1744 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1745 function will return ['python2.7', '-c', 'print("hi")]
1746 """
1747 if not text.endswith('\\0'):
1748 raise ValueError('String is not null terminated: %r' % text, text)
1749 text = text[:-2]
1750
1751 def unescape(x):
1752 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1753 out = []
1754 escaped = False
1755 for i in x:
1756 if i == '\\' and not escaped:
1757 escaped = True
1758 continue
1759 escaped = False
1760 out.append(i)
1761 return ''.join(out)
1762
1763 return [unescape(i) for i in text.split('\\001')]
1764
1765 class Tracer(ApiBase.Tracer):
1766 # pylint: disable=C0301
1767 #
1768 # To understand the following code, you'll want to take a look at:
1769 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
1770 # https://wikis.oracle.com/display/DTrace/Variables
1771 # http://docs.oracle.com/cd/E19205-01/820-4221/
1772 #
1773 # 0. Dump all the valid probes into a text file. It is important, you
1774 # want to redirect into a file and you don't want to constantly 'sudo'.
1775 # $ sudo dtrace -l > probes.txt
1776 #
1777 # 1. Count the number of probes:
1778 # $ wc -l probes.txt
1779 # 81823 # On OSX 10.7, including 1 header line.
1780 #
1781 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1782 # likes and skipping the header with NR>1:
1783 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1784 # dtrace
1785 # fbt
1786 # io
1787 # ip
1788 # lockstat
1789 # mach_trap
1790 # proc
1791 # profile
1792 # sched
1793 # syscall
1794 # tcp
1795 # vminfo
1796 #
1797 # 3. List of valid probes:
1798 # $ grep syscall probes.txt | less
1799 # or use dtrace directly:
1800 # $ sudo dtrace -l -P syscall | less
1801 #
1802 # trackedpid is an associative array where its value can be 0, 1 or 2.
1803 # 0 is for untracked processes and is the default value for items not
1804 # in the associative array.
1805 # 1 is for tracked processes.
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001806 # 2 is for the script created by create_subprocess_thunk() only. It is not
1807 # tracked itself but all its decendants are.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001808 #
1809 # The script will kill itself only once waiting_to_die == 1 and
1810 # current_processes == 0, so that both getlogin() was called and that
1811 # all traced processes exited.
1812 #
1813 # TODO(maruel): Use cacheable predicates. See
1814 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1815 D_CODE = """
1816 dtrace:::BEGIN {
1817 waiting_to_die = 0;
1818 current_processes = 0;
1819 logindex = 0;
1820 printf("%d %d %s_%s(\\"%s\\")\\n",
1821 logindex, PID, probeprov, probename, SCRIPT);
1822 logindex++;
1823 }
1824
1825 proc:::start /trackedpid[ppid]/ {
1826 trackedpid[pid] = 1;
1827 current_processes += 1;
1828 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1829 logindex, pid, probeprov, probename,
1830 ppid,
1831 execname,
1832 current_processes);
1833 logindex++;
1834 }
1835 /* Should use SCRIPT but there is no access to this variable at that
1836 * point. */
1837 proc:::start /ppid == PID && execname == "Python"/ {
1838 trackedpid[pid] = 2;
1839 current_processes += 1;
1840 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1841 logindex, pid, probeprov, probename,
1842 ppid,
1843 execname,
1844 current_processes);
1845 logindex++;
1846 }
1847 proc:::exit /trackedpid[pid] &&
1848 current_processes == 1 &&
1849 waiting_to_die == 1/ {
1850 trackedpid[pid] = 0;
1851 current_processes -= 1;
1852 printf("%d %d %s_%s(%d)\\n",
1853 logindex, pid, probeprov, probename,
1854 current_processes);
1855 logindex++;
1856 exit(0);
1857 }
1858 proc:::exit /trackedpid[pid]/ {
1859 trackedpid[pid] = 0;
1860 current_processes -= 1;
1861 printf("%d %d %s_%s(%d)\\n",
1862 logindex, pid, probeprov, probename,
1863 current_processes);
1864 logindex++;
1865 }
1866
1867 /* Use an arcane function to detect when we need to die */
1868 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
1869 waiting_to_die = 1;
1870 printf("%d %d %s()\\n", logindex, pid, probefunc);
1871 logindex++;
1872 }
1873 syscall::ftruncate:entry /
1874 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
1875 exit(0);
1876 }
1877
1878 syscall::open*:entry /trackedpid[pid] == 1/ {
1879 self->open_arg0 = arg0;
1880 self->open_arg1 = arg1;
1881 self->open_arg2 = arg2;
1882 }
1883 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
1884 this->open_arg0 = copyinstr(self->open_arg0);
1885 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
1886 logindex, pid, probefunc,
1887 this->open_arg0,
1888 self->open_arg1,
1889 self->open_arg2);
1890 logindex++;
1891 this->open_arg0 = 0;
1892 }
1893 syscall::open*:return /trackedpid[pid] == 1/ {
1894 self->open_arg0 = 0;
1895 self->open_arg1 = 0;
1896 self->open_arg2 = 0;
1897 }
1898
1899 syscall::rename:entry /trackedpid[pid] == 1/ {
1900 self->rename_arg0 = arg0;
1901 self->rename_arg1 = arg1;
1902 }
1903 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
1904 this->rename_arg0 = copyinstr(self->rename_arg0);
1905 this->rename_arg1 = copyinstr(self->rename_arg1);
1906 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
1907 logindex, pid, probefunc,
1908 this->rename_arg0,
1909 this->rename_arg1);
1910 logindex++;
1911 this->rename_arg0 = 0;
1912 this->rename_arg1 = 0;
1913 }
1914 syscall::rename:return /trackedpid[pid] == 1/ {
1915 self->rename_arg0 = 0;
1916 self->rename_arg1 = 0;
1917 }
1918
1919 /* Track chdir, it's painful because it is only receiving relative path.
1920 */
1921 syscall::chdir:entry /trackedpid[pid] == 1/ {
1922 self->chdir_arg0 = arg0;
1923 }
1924 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
1925 this->chdir_arg0 = copyinstr(self->chdir_arg0);
1926 printf("%d %d %s(\\"%s\\")\\n",
1927 logindex, pid, probefunc,
1928 this->chdir_arg0);
1929 logindex++;
1930 this->chdir_arg0 = 0;
1931 }
1932 syscall::chdir:return /trackedpid[pid] == 1/ {
1933 self->chdir_arg0 = 0;
1934 }
1935 """
1936
1937 # execve-specific code, tends to throw a lot of exceptions.
1938 D_CODE_EXECVE = """
1939 /* Finally what we care about! */
1940 syscall::exec*:entry /trackedpid[pid]/ {
1941 self->exec_arg0 = copyinstr(arg0);
1942 /* Incrementally probe for a NULL in the argv parameter of execve() to
1943 * figure out argc. */
1944 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
1945 * found. */
1946 self->exec_argc = 0;
1947 /* Probe for argc==1 */
1948 this->exec_argv = (user_addr_t*)copyin(
1949 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1950 self->exec_argc = this->exec_argv[self->exec_argc] ?
1951 (self->exec_argc + 1) : self->exec_argc;
1952
1953 /* Probe for argc==2 */
1954 this->exec_argv = (user_addr_t*)copyin(
1955 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1956 self->exec_argc = this->exec_argv[self->exec_argc] ?
1957 (self->exec_argc + 1) : self->exec_argc;
1958
1959 /* Probe for argc==3 */
1960 this->exec_argv = (user_addr_t*)copyin(
1961 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1962 self->exec_argc = this->exec_argv[self->exec_argc] ?
1963 (self->exec_argc + 1) : self->exec_argc;
1964
1965 /* Probe for argc==4 */
1966 this->exec_argv = (user_addr_t*)copyin(
1967 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1968 self->exec_argc = this->exec_argv[self->exec_argc] ?
1969 (self->exec_argc + 1) : self->exec_argc;
1970
1971 /* Copy the inputs strings since there is no guarantee they'll be
1972 * present after the call completed. */
1973 self->exec_argv0 = (self->exec_argc > 0) ?
1974 copyinstr(this->exec_argv[0]) : "";
1975 self->exec_argv1 = (self->exec_argc > 1) ?
1976 copyinstr(this->exec_argv[1]) : "";
1977 self->exec_argv2 = (self->exec_argc > 2) ?
1978 copyinstr(this->exec_argv[2]) : "";
1979 self->exec_argv3 = (self->exec_argc > 3) ?
1980 copyinstr(this->exec_argv[3]) : "";
1981 this->exec_argv = 0;
1982 }
1983 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
1984 /* We need to join strings here, as using multiple printf() would
1985 * cause tearing when multiple threads/processes are traced.
1986 * Since it is impossible to escape a string and join it to another one,
1987 * like sprintf("%s%S", previous, more), use hackery.
1988 * Each of the elements are split with a \\1. \\0 cannot be used because
1989 * it is simply ignored. This will conflict with any program putting a
1990 * \\1 in their execve() string but this should be "rare enough" */
1991 this->args = "";
1992 /* Process exec_argv[0] */
1993 this->args = strjoin(
1994 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
1995
1996 /* Process exec_argv[1] */
1997 this->args = strjoin(
1998 this->args, (self->exec_argc > 1) ? "\\1" : "");
1999 this->args = strjoin(
2000 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2001
2002 /* Process exec_argv[2] */
2003 this->args = strjoin(
2004 this->args, (self->exec_argc > 2) ? "\\1" : "");
2005 this->args = strjoin(
2006 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2007
2008 /* Process exec_argv[3] */
2009 this->args = strjoin(
2010 this->args, (self->exec_argc > 3) ? "\\1" : "");
2011 this->args = strjoin(
2012 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2013
2014 /* Prints self->exec_argc to permits verifying the internal
2015 * consistency since this code is quite fishy. */
2016 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2017 logindex, pid, probefunc,
2018 self->exec_arg0,
2019 self->exec_argc,
2020 this->args);
2021 logindex++;
2022 this->args = 0;
2023 }
2024 syscall::exec*:return /trackedpid[pid]/ {
2025 self->exec_arg0 = 0;
2026 self->exec_argc = 0;
2027 self->exec_argv0 = 0;
2028 self->exec_argv1 = 0;
2029 self->exec_argv2 = 0;
2030 self->exec_argv3 = 0;
2031 }
2032 """
2033
2034 # Code currently not used.
2035 D_EXTRANEOUS = """
2036 /* This is a good learning experience, since it traces a lot of things
2037 * related to the process and child processes.
2038 * Warning: it generates a gigantic log. For example, tracing
2039 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2040 * several minutes to execute.
2041 */
2042 /*
2043 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2044 printf("%d %d %s_%s() = %d\\n",
2045 logindex, pid, probeprov, probefunc, errno);
2046 logindex++;
2047 }
2048 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2049 printf("%d %d %s_%s() = %d\\n",
2050 logindex, pid, probeprov, probefunc, errno);
2051 logindex++;
2052 }
2053 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2054 printf("%d %d %s_%s() = %d\\n",
2055 logindex, pid, probeprov, probefunc, errno);
2056 logindex++;
2057 }
2058 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2059 printf("%d %d %s_%s() = %d\\n",
2060 logindex, pid, probeprov, probefunc, errno);
2061 logindex++;
2062 }
2063 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2064 printf("%d %d %s_%s() = %d\\n",
2065 logindex, pid, probeprov, probefunc, errno);
2066 logindex++;
2067 }
2068 */
2069 /* TODO(maruel): *stat* functions and friends
2070 syscall::access:return,
2071 syscall::chdir:return,
2072 syscall::chflags:return,
2073 syscall::chown:return,
2074 syscall::chroot:return,
2075 syscall::getattrlist:return,
2076 syscall::getxattr:return,
2077 syscall::lchown:return,
2078 syscall::lstat64:return,
2079 syscall::lstat:return,
2080 syscall::mkdir:return,
2081 syscall::pathconf:return,
2082 syscall::readlink:return,
2083 syscall::removexattr:return,
2084 syscall::setxattr:return,
2085 syscall::stat64:return,
2086 syscall::stat:return,
2087 syscall::truncate:return,
2088 syscall::unlink:return,
2089 syscall::utimes:return,
2090 */
2091 """
2092
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002093 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002094 """Starts the log collection with dtrace.
2095
2096 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2097 this needs to wait for dtrace to be "warmed up".
2098 """
2099 super(Dtrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002100 # This script is used as a signal to figure out the root process.
2101 self._signal_script = create_subprocess_thunk()
2102 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002103 # This unique dummy temp file is used to signal the dtrace script that it
2104 # should stop as soon as all the child processes are done. A bit hackish
2105 # but works fine enough.
2106 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2107 prefix='trace_signal_file')
2108
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002109 dtrace_path = '/usr/sbin/dtrace'
2110 if not os.path.isfile(dtrace_path):
2111 dtrace_path = 'dtrace'
2112 elif use_sudo is None and (os.stat(dtrace_path).st_mode & stat.S_ISUID):
2113 # No need to sudo. For those following at home, don't do that.
2114 use_sudo = False
2115
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002116 # Note: do not use the -p flag. It's useless if the initial process quits
2117 # too fast, resulting in missing traces from the grand-children. The D
2118 # code manages the dtrace lifetime itself.
2119 trace_cmd = [
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002120 dtrace_path,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002121 # Use a larger buffer if getting 'out of scratch space' errors.
2122 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2123 '-b', '10m',
2124 '-x', 'dynvarsize=10m',
2125 #'-x', 'dtrace_global_maxsize=1m',
2126 '-x', 'evaltime=exec',
2127 '-o', '/dev/stderr',
2128 '-q',
2129 '-n', self._get_dtrace_code(),
2130 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002131 if use_sudo is not False:
2132 trace_cmd.insert(0, 'sudo')
2133
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002134 with open(self._logname + '.log', 'wb') as logfile:
2135 self._dtrace = subprocess.Popen(
2136 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2137 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2138
2139 # Reads until one line is printed, which signifies dtrace is up and ready.
2140 with open(self._logname + '.log', 'rb') as logfile:
2141 while 'dtrace_BEGIN' not in logfile.readline():
2142 if self._dtrace.poll() is not None:
2143 # Do a busy wait. :/
2144 break
2145 logging.debug('dtrace started')
2146
2147 def _get_dtrace_code(self):
2148 """Setups the D code to implement child process tracking.
2149
2150 Injects the cookie in the script so it knows when to stop.
2151
2152 The script will detect any instance of the script created with
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002153 create_subprocess_thunk() and will start tracing it.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002154 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002155 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002156 'inline int PID = %d;\n'
2157 'inline string SCRIPT = "%s";\n'
2158 'inline int FILE_ID = %d;\n'
2159 '\n'
2160 '%s') % (
2161 os.getpid(),
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002162 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002163 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002164 self.D_CODE)
2165 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2166 # Do not enable by default since it tends to spew dtrace: error lines
2167 # because the execve() parameters are not in valid memory at the time of
2168 # logging.
2169 # TODO(maruel): Find a way to make this reliable since it's useful but
2170 # only works in limited/trivial uses cases for now.
2171 out += self.D_CODE_EXECVE
2172 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002173
2174 def trace(self, cmd, cwd, tracename, output):
2175 """Runs dtrace on an executable.
2176
2177 This dtruss is broken when it starts the process itself or when tracing
2178 child processes, this code starts a wrapper process
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002179 generated with create_subprocess_thunk() which starts the executable to
2180 trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002181 """
2182 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2183 assert os.path.isabs(cmd[0]), cmd[0]
2184 assert os.path.isabs(cwd), cwd
2185 assert os.path.normpath(cwd) == cwd, cwd
2186 with self._lock:
2187 if not self._initialized:
2188 raise TracingFailure(
2189 'Called Tracer.trace() on an unitialized object',
2190 None, None, None, tracename)
2191 assert tracename not in (i['trace'] for i in self._traces)
2192
2193 # Starts the script wrapper to start the child process. This signals the
2194 # dtrace script that this process is to be traced.
2195 stdout = stderr = None
2196 if output:
2197 stdout = subprocess.PIPE
2198 stderr = subprocess.STDOUT
2199 child_cmd = [
2200 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002201 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002202 tracename,
2203 ]
2204 # Call a dummy function so that dtrace knows I'm about to launch a process
2205 # that needs to be traced.
2206 # Yummy.
2207 child = subprocess.Popen(
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00002208 child_cmd + tools.fix_python_path(cmd),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002209 stdin=subprocess.PIPE,
2210 stdout=stdout,
2211 stderr=stderr,
2212 cwd=cwd)
2213 logging.debug('Started child pid: %d' % child.pid)
2214
2215 out = child.communicate()[0]
2216 # This doesn't mean tracing is done, one of the grand-child process may
2217 # still be alive. It will be tracked with the dtrace script.
2218
2219 with self._lock:
2220 assert tracename not in (i['trace'] for i in self._traces)
2221 self._traces.append(
2222 {
2223 'cmd': cmd,
2224 'cwd': cwd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002225 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002226 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002227 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002228 })
2229 return child.returncode, out
2230
2231 def close(self, timeout=None):
2232 """Terminates dtrace."""
2233 logging.debug('close(%s)' % timeout)
2234 try:
2235 try:
2236 super(Dtrace.Tracer, self).close(timeout)
2237 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002238 # ftruncate doesn't exist on Windows.
2239 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002240 if timeout:
2241 start = time.time()
2242 # Use polling. :/
2243 while (self._dtrace.poll() is None and
2244 (time.time() - start) < timeout):
2245 time.sleep(0.1)
2246 self._dtrace.kill()
2247 self._dtrace.wait()
2248 finally:
2249 # Make sure to kill it in any case.
2250 if self._dtrace.poll() is None:
2251 try:
2252 self._dtrace.kill()
2253 self._dtrace.wait()
2254 except OSError:
2255 pass
2256
2257 if self._dtrace.returncode != 0:
2258 # Warn about any dtrace failure but basically ignore it.
2259 print 'dtrace failure: %s' % self._dtrace.returncode
2260 finally:
2261 os.close(self._dummy_file_id)
2262 os.remove(self._dummy_file_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002263
2264 def post_process_log(self):
2265 """Sorts the log back in order when each call occured.
2266
2267 dtrace doesn't save the buffer in strict order since it keeps one buffer
2268 per CPU.
2269 """
2270 super(Dtrace.Tracer, self).post_process_log()
2271 logname = self._logname + '.log'
2272 with open(logname, 'rb') as logfile:
2273 lines = [l for l in logfile if l.strip()]
2274 errors = [l for l in lines if l.startswith('dtrace:')]
2275 if errors:
2276 raise TracingFailure(
2277 'Found errors in the trace: %s' % '\n'.join(errors),
2278 None, None, None, logname)
2279 try:
2280 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2281 except ValueError:
2282 raise TracingFailure(
2283 'Found errors in the trace: %s' % '\n'.join(
2284 l for l in lines if l.split(' ', 1)[0].isdigit()),
2285 None, None, None, logname)
2286 with open(logname, 'wb') as logfile:
2287 logfile.write(''.join(lines))
2288
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002289 def __init__(self, use_sudo=None):
2290 super(Dtrace, self).__init__()
2291 self.use_sudo = use_sudo
2292
2293 def get_tracer(self, logname):
2294 return self.Tracer(logname, self.use_sudo)
2295
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002296 @staticmethod
2297 def clean_trace(logname):
2298 for ext in ('', '.log'):
2299 if os.path.isfile(logname + ext):
2300 os.remove(logname + ext)
2301
2302 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002303 def parse_log(cls, logname, blacklist, trace_name):
2304 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002305 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002306
2307 def blacklist_more(filepath):
2308 # All the HFS metadata is in the form /.vol/...
2309 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2310
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05002311 data = tools.read_json(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002312 out = []
2313 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002314 if trace_name and item['trace'] != trace_name:
2315 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002316 result = {
2317 'output': item['output'],
2318 'trace': item['trace'],
2319 }
2320 try:
2321 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002322 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2323 # be valid UTF-8 and we control the log output.
2324 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002325 context.on_line(line)
2326 result['results'] = context.to_results()
2327 except TracingFailure:
2328 result['exception'] = sys.exc_info()
2329 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002330 return out
2331
2332
2333class LogmanTrace(ApiBase):
2334 """Uses the native Windows ETW based tracing functionality to trace a child
2335 process.
2336
2337 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2338 the Windows Kernel doesn't have a concept of 'current working directory' at
2339 all. A Win32 process has a map of current directories, one per drive letter
2340 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2341 opened relative to another file_object or as an absolute path. All the current
2342 working directory logic is done in user mode.
2343 """
2344 class Context(ApiBase.Context):
2345 """Processes a ETW log line and keeps the list of existent and non
2346 existent files accessed.
2347
2348 Ignores directories.
2349 """
2350 # These indexes are for the stripped version in json.
2351 EVENT_NAME = 0
2352 TYPE = 1
2353 PID = 2
2354 TID = 3
2355 PROCESSOR_ID = 4
2356 TIMESTAMP = 5
2357 USER_DATA = 6
2358
2359 class Process(ApiBase.Context.Process):
2360 def __init__(self, *args):
2361 super(LogmanTrace.Context.Process, self).__init__(*args)
2362 # Handle file objects that succeeded.
2363 self.file_objects = {}
2364
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002365 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2366 logging.info(
2367 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2368 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002369 super(LogmanTrace.Context, self).__init__(blacklist)
maruel@chromium.org561d4b22013-09-26 21:08:08 +00002370 self._drive_map = file_path.DosDriveMap()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002371 # Threads mapping to the corresponding process id.
2372 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002373 # Process ID of the tracer, e.g. the temporary script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002374 # create_subprocess_thunk(). This is tricky because the process id may
2375 # have been reused.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002376 self._thunk_pid = thunk_pid
2377 self._thunk_cmd = thunk_cmd
2378 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002379 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002380 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002381
2382 def on_line(self, line):
2383 """Processes a json Event line."""
2384 self._line_number += 1
2385 try:
2386 # By Opcode
2387 handler = getattr(
2388 self,
2389 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2390 None)
2391 if not handler:
2392 raise TracingFailure(
2393 'Unexpected event %s_%s' % (
2394 line[self.EVENT_NAME], line[self.TYPE]),
2395 None, None, None)
2396 handler(line)
2397 except TracingFailure, e:
2398 # Hack in the values since the handler could be a static function.
2399 e.pid = line[self.PID]
2400 e.line = line
2401 e.line_number = self._line_number
2402 # Re-raise the modified exception.
2403 raise
2404 except (KeyError, NotImplementedError, ValueError), e:
2405 raise TracingFailure(
2406 'Trace generated a %s exception: %s' % (
2407 e.__class__.__name__, str(e)),
2408 line[self.PID],
2409 self._line_number,
2410 line,
2411 e)
2412
2413 def to_results(self):
2414 if not self.root_process:
2415 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002416 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002417 None, None, None)
2418 process = self.root_process.to_results_process()
2419 return Results(process)
2420
2421 def _thread_to_process(self, tid):
2422 """Finds the process from the thread id."""
2423 tid = int(tid, 16)
2424 pid = self._threads_active.get(tid)
2425 if not pid or not self._process_lookup.get(pid):
2426 return
2427 return self._process_lookup[pid]
2428
2429 @classmethod
2430 def handle_EventTrace_Header(cls, line):
2431 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2432 BUFFER_SIZE = cls.USER_DATA
2433 #VERSION = cls.USER_DATA + 1
2434 #PROVIDER_VERSION = cls.USER_DATA + 2
2435 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2436 #END_TIME = cls.USER_DATA + 4
2437 #TIMER_RESOLUTION = cls.USER_DATA + 5
2438 #MAX_FILE_SIZE = cls.USER_DATA + 6
2439 #LOG_FILE_MODE = cls.USER_DATA + 7
2440 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2441 #START_BUFFERS = cls.USER_DATA + 9
2442 #POINTER_SIZE = cls.USER_DATA + 10
2443 EVENTS_LOST = cls.USER_DATA + 11
2444 #CPU_SPEED = cls.USER_DATA + 12
2445 #LOGGER_NAME = cls.USER_DATA + 13
2446 #LOG_FILE_NAME = cls.USER_DATA + 14
2447 #BOOT_TIME = cls.USER_DATA + 15
2448 #PERF_FREQ = cls.USER_DATA + 16
2449 #START_TIME = cls.USER_DATA + 17
2450 #RESERVED_FLAGS = cls.USER_DATA + 18
2451 #BUFFERS_LOST = cls.USER_DATA + 19
2452 #SESSION_NAME_STRING = cls.USER_DATA + 20
2453 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2454 if line[EVENTS_LOST] != '0':
2455 raise TracingFailure(
2456 ( '%s events were lost during trace, please increase the buffer '
2457 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2458 None, None, None)
2459
2460 def handle_FileIo_Cleanup(self, line):
2461 """General wisdom: if a file is closed, it's because it was opened.
2462
2463 Note that FileIo_Close is not used since if a file was opened properly but
2464 not closed before the process exits, only Cleanup will be logged.
2465 """
2466 #IRP = self.USER_DATA
2467 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2468 FILE_OBJECT = self.USER_DATA + 2
2469 #FILE_KEY = self.USER_DATA + 3
2470 proc = self._thread_to_process(line[TTID])
2471 if not proc:
2472 # Not a process we care about.
2473 return
2474 file_object = line[FILE_OBJECT]
2475 if file_object in proc.file_objects:
maruel@chromium.org538141b2013-06-03 20:57:17 +00002476 filepath, access_type = proc.file_objects.pop(file_object)
2477 proc.add_file(filepath, access_type)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002478
2479 def handle_FileIo_Create(self, line):
2480 """Handles a file open.
2481
2482 All FileIo events are described at
2483 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2484 for some value of 'description'.
2485
2486 " (..) process and thread id values of the IO events (..) are not valid "
2487 http://msdn.microsoft.com/magazine/ee358703.aspx
2488
2489 The FileIo.Create event doesn't return if the CreateFile() call
2490 succeeded, so keep track of the file_object and check that it is
2491 eventually closed with FileIo_Cleanup.
2492 """
2493 #IRP = self.USER_DATA
2494 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2495 FILE_OBJECT = self.USER_DATA + 2
2496 #CREATE_OPTIONS = self.USER_DATA + 3
2497 #FILE_ATTRIBUTES = self.USER_DATA + 4
maruel@chromium.org538141b2013-06-03 20:57:17 +00002498 #SHARE_ACCESS = self.USER_DATA + 5
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002499 OPEN_PATH = self.USER_DATA + 6
2500
2501 proc = self._thread_to_process(line[TTID])
2502 if not proc:
2503 # Not a process we care about.
2504 return
2505
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002506 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002507 # Ignore directories and bare drive right away.
2508 if raw_path.endswith(os.path.sep):
2509 return
2510 filepath = self._drive_map.to_win32(raw_path)
2511 # Ignore bare drive right away. Some may still fall through with format
2512 # like '\\?\X:'
2513 if len(filepath) == 2:
2514 return
2515 file_object = line[FILE_OBJECT]
2516 if os.path.isdir(filepath):
2517 # There is no O_DIRECTORY equivalent on Windows. The closed is
2518 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2519 # simply discard directories are they are found.
2520 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002521 # Override any stale file object.
2522 # TODO(maruel): Figure out a way to detect if the file was opened for
2523 # reading or writting. Sadly CREATE_OPTIONS doesn't seem to be of any help
2524 # here. For now mark as None to make it clear we have no idea what it is
2525 # about.
2526 proc.file_objects[file_object] = (filepath, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002527
2528 def handle_FileIo_Rename(self, line):
2529 # TODO(maruel): Handle?
2530 pass
2531
2532 def handle_Process_End(self, line):
2533 pid = line[self.PID]
2534 if self._process_lookup.get(pid):
2535 logging.info('Terminated: %d' % pid)
2536 self._process_lookup[pid] = None
2537 else:
2538 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002539 if self._thunk_process and self._thunk_process.pid == pid:
2540 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002541
2542 def handle_Process_Start(self, line):
2543 """Handles a new child process started by PID."""
2544 #UNIQUE_PROCESS_KEY = self.USER_DATA
2545 PROCESS_ID = self.USER_DATA + 1
2546 #PARENT_PID = self.USER_DATA + 2
2547 #SESSION_ID = self.USER_DATA + 3
2548 #EXIT_STATUS = self.USER_DATA + 4
2549 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2550 #USER_SID = self.USER_DATA + 6
2551 IMAGE_FILE_NAME = self.USER_DATA + 7
2552 COMMAND_LINE = self.USER_DATA + 8
2553
2554 ppid = line[self.PID]
2555 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002556 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002557 logging.debug(
2558 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002559 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002560
2561 if pid == self._thunk_pid:
2562 # Need to ignore processes we don't know about because the log is
2563 # system-wide. self._thunk_pid shall start only one process.
2564 # This is tricky though because Windows *loves* to reuse process id and
2565 # it happens often that the process ID of the thunk script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002566 # create_subprocess_thunk() is reused. So just detecting the pid here is
2567 # not sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002568 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2569 logging.info(
2570 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2571 pid, self._trace_name, command_line, self._thunk_cmd)
2572 return
2573
2574 # TODO(maruel): The check is quite weak. Add the thunk path.
2575 if self._thunk_process:
2576 raise TracingFailure(
2577 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2578 'already set') % (self._thunk_pid, self._thunk_process.pid),
2579 None, None, None)
2580 proc = self.Process(self.blacklist, pid, None)
2581 self._thunk_process = proc
2582 return
2583 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002584 proc = self.Process(self.blacklist, pid, None)
2585 self.root_process = proc
2586 ppid = None
2587 elif self._process_lookup.get(ppid):
2588 proc = self.Process(self.blacklist, pid, None)
2589 self._process_lookup[ppid].children.append(proc)
2590 else:
2591 # Ignore
2592 return
2593 self._process_lookup[pid] = proc
2594
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002595 proc.command = command_line
2596 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002597 # proc.command[0] may be the absolute path of 'executable' but it may be
2598 # anything else too. If it happens that command[0] ends with executable,
2599 # use it, otherwise defaults to the base name.
2600 cmd0 = proc.command[0].lower()
2601 if not cmd0.endswith('.exe'):
2602 # TODO(maruel): That's not strictly true either.
2603 cmd0 += '.exe'
2604 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
2605 # Fix the path.
2606 cmd0 = cmd0.replace('/', os.path.sep)
2607 cmd0 = os.path.normpath(cmd0)
maruel@chromium.org561d4b22013-09-26 21:08:08 +00002608 proc.executable = file_path.get_native_path_case(cmd0)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002609 logging.info(
2610 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2611
2612 def handle_Thread_End(self, line):
2613 """Has the same parameters as Thread_Start."""
2614 tid = int(line[self.TID], 16)
2615 self._threads_active.pop(tid, None)
2616
2617 def handle_Thread_Start(self, line):
2618 """Handles a new thread created.
2619
2620 Do not use self.PID here since a process' initial thread is created by
2621 the parent process.
2622 """
2623 PROCESS_ID = self.USER_DATA
2624 TTHREAD_ID = self.USER_DATA + 1
2625 #STACK_BASE = self.USER_DATA + 2
2626 #STACK_LIMIT = self.USER_DATA + 3
2627 #USER_STACK_BASE = self.USER_DATA + 4
2628 #USER_STACK_LIMIT = self.USER_DATA + 5
2629 #AFFINITY = self.USER_DATA + 6
2630 #WIN32_START_ADDR = self.USER_DATA + 7
2631 #TEB_BASE = self.USER_DATA + 8
2632 #SUB_PROCESS_TAG = self.USER_DATA + 9
2633 #BASE_PRIORITY = self.USER_DATA + 10
2634 #PAGE_PRIORITY = self.USER_DATA + 11
2635 #IO_PRIORITY = self.USER_DATA + 12
2636 #THREAD_FLAGS = self.USER_DATA + 13
2637 # Do not use self.PID here since a process' initial thread is created by
2638 # the parent process.
2639 pid = int(line[PROCESS_ID], 16)
2640 tid = int(line[TTHREAD_ID], 16)
2641 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2642 self._threads_active[tid] = pid
2643
2644 @classmethod
2645 def supported_events(cls):
2646 """Returns all the procesed events."""
2647 out = []
2648 for member in dir(cls):
2649 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2650 if match:
2651 out.append(match.groups())
2652 return out
2653
2654 class Tracer(ApiBase.Tracer):
2655 # The basic headers.
2656 EXPECTED_HEADER = [
2657 u'Event Name',
2658 u'Type',
2659 u'Event ID',
2660 u'Version',
2661 u'Channel',
2662 u'Level', # 5
2663 u'Opcode',
2664 u'Task',
2665 u'Keyword',
2666 u'PID',
2667 u'TID', # 10
2668 u'Processor Number',
2669 u'Instance ID',
2670 u'Parent Instance ID',
2671 u'Activity ID',
2672 u'Related Activity ID', # 15
2673 u'Clock-Time',
2674 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2675 u'User(ms)', # pretty much useless.
2676 u'User Data', # Extra arguments that are event-specific.
2677 ]
2678 # Only the useful headers common to all entries are listed there. Any column
2679 # at 19 or higher is dependent on the specific event.
2680 EVENT_NAME = 0
2681 TYPE = 1
2682 PID = 9
2683 TID = 10
2684 PROCESSOR_ID = 11
2685 TIMESTAMP = 16
2686 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2687 USER_DATA = 19
2688
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002689 class CsvReader(object):
2690 """CSV reader that reads files generated by tracerpt.exe.
2691
2692 csv.reader() fails to read them properly, it mangles file names quoted
2693 with "" with a comma in it.
2694 """
2695 # 0. Had a ',' or one of the following ' ' after a comma, next should
2696 # be ' ', '"' or string or ',' for an empty field.
2697 ( HAD_DELIMITER,
2698 # 1. Processing an unquoted field up to ','.
2699 IN_STR,
2700 # 2. Processing a new field starting with '"'.
2701 STARTING_STR_QUOTED,
2702 # 3. Second quote in a row at the start of a field. It could be either
2703 # '""foo""' or '""'. Who the hell thought it was a great idea to use
2704 # the same character for delimiting and escaping?
2705 STARTING_SECOND_QUOTE,
2706 # 4. A quote inside a quoted string where the previous character was
2707 # not a quote, so the string is not empty. Can be either: end of a
2708 # quoted string (a delimiter) or a quote escape. The next char must be
2709 # either '"' or ','.
2710 HAD_QUOTE_IN_QUOTED,
2711 # 5. Second quote inside a quoted string.
2712 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
2713 # 6. Processing a field that started with '"'.
2714 IN_STR_QUOTED) = range(7)
2715
2716 def __init__(self, f):
2717 self.f = f
2718
2719 def __iter__(self):
2720 return self
2721
2722 def next(self):
2723 """Splits the line in fields."""
2724 line = self.f.readline()
2725 if not line:
2726 raise StopIteration()
2727 line = line.strip()
2728 fields = []
2729 state = self.HAD_DELIMITER
2730 for i, c in enumerate(line):
2731 if state == self.HAD_DELIMITER:
2732 if c == ',':
2733 # Empty field.
2734 fields.append('')
2735 elif c == ' ':
2736 # Ignore initial whitespaces
2737 pass
2738 elif c == '"':
2739 state = self.STARTING_STR_QUOTED
2740 fields.append('')
2741 else:
2742 # Start of a new field.
2743 state = self.IN_STR
2744 fields.append(c)
2745
2746 elif state == self.IN_STR:
2747 # Do not accept quote inside unquoted field.
2748 assert c != '"', (i, c, line, fields)
2749 if c == ',':
2750 fields[-1] = fields[-1].strip()
2751 state = self.HAD_DELIMITER
2752 else:
2753 fields[-1] = fields[-1] + c
2754
2755 elif state == self.STARTING_STR_QUOTED:
2756 if c == '"':
2757 # Do not store the character yet.
2758 state = self.STARTING_SECOND_QUOTE
2759 else:
2760 state = self.IN_STR_QUOTED
2761 fields[-1] = fields[-1] + c
2762
2763 elif state == self.STARTING_SECOND_QUOTE:
2764 if c == ',':
2765 # It was an empty field. '""' == ''.
2766 state = self.HAD_DELIMITER
2767 else:
2768 fields[-1] = fields[-1] + '"' + c
2769 state = self.IN_STR_QUOTED
2770
2771 elif state == self.HAD_QUOTE_IN_QUOTED:
2772 if c == ',':
2773 # End of the string.
2774 state = self.HAD_DELIMITER
2775 elif c == '"':
2776 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
2777 else:
2778 # The previous double-quote was just an unescaped quote.
2779 fields[-1] = fields[-1] + '"' + c
2780 state = self.IN_STR_QUOTED
2781
2782 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2783 if c == ',':
2784 # End of the string.
2785 state = self.HAD_DELIMITER
2786 fields[-1] = fields[-1] + '"'
2787 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00002788 # That's just how the logger rolls. Revert back to appending the
2789 # char and "guess" it was a quote in a double-quoted string.
2790 state = self.IN_STR_QUOTED
2791 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002792
2793 elif state == self.IN_STR_QUOTED:
2794 if c == '"':
2795 # Could be a delimiter or an escape.
2796 state = self.HAD_QUOTE_IN_QUOTED
2797 else:
2798 fields[-1] = fields[-1] + c
2799
2800 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2801 fields[-1] = fields[-1] + '"'
2802 else:
2803 assert state in (
2804 # Terminated with a normal field.
2805 self.IN_STR,
2806 # Terminated with an empty field.
2807 self.STARTING_SECOND_QUOTE,
2808 # Terminated with a normal quoted field.
2809 self.HAD_QUOTE_IN_QUOTED), (
2810 line, state, fields)
2811 return fields
2812
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002813 def __init__(self, logname):
2814 """Starts the log collection.
2815
2816 Requires administrative access. logman.exe is synchronous so no need for a
2817 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2818 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2819
2820 One can get the list of potentially interesting providers with:
2821 "logman query providers | findstr /i file"
2822 """
2823 super(LogmanTrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002824 self._signal_script = create_subprocess_thunk()
2825 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002826 cmd_start = [
2827 'logman.exe',
2828 'start',
2829 'NT Kernel Logger',
2830 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2831 # splitio,fileiocompletion,syscall,file,cswitch,img
2832 '(process,fileio,thread)',
2833 '-o', self._logname + '.etl',
2834 '-ets', # Send directly to kernel
2835 # Values extracted out of thin air.
2836 # Event Trace Session buffer size in kb.
2837 '-bs', '10240',
2838 # Number of Event Trace Session buffers.
2839 '-nb', '16', '256',
2840 ]
2841 logging.debug('Running: %s' % cmd_start)
2842 try:
2843 subprocess.check_call(
2844 cmd_start,
2845 stdin=subprocess.PIPE,
2846 stdout=subprocess.PIPE,
2847 stderr=subprocess.STDOUT)
2848 except subprocess.CalledProcessError, e:
2849 if e.returncode == -2147024891:
2850 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2851 elif e.returncode == -2144337737:
2852 print >> sys.stderr, (
2853 'A kernel trace was already running, stop it and try again')
2854 raise
2855
2856 def trace(self, cmd, cwd, tracename, output):
2857 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2858 assert os.path.isabs(cmd[0]), cmd[0]
2859 assert os.path.isabs(cwd), cwd
2860 assert os.path.normpath(cwd) == cwd, cwd
2861 with self._lock:
2862 if not self._initialized:
2863 raise TracingFailure(
2864 'Called Tracer.trace() on an unitialized object',
2865 None, None, None, tracename)
2866 assert tracename not in (i['trace'] for i in self._traces)
2867
2868 # Use "logman -?" for help.
2869
2870 stdout = stderr = None
2871 if output:
2872 stdout = subprocess.PIPE
2873 stderr = subprocess.STDOUT
2874
2875 # Run the child process.
2876 logging.debug('Running: %s' % cmd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002877 # Use the temporary script generated with create_subprocess_thunk() so we
2878 # have a clear pid owner. Since trace_inputs.py can be used as a library
2879 # and could trace multiple processes simultaneously, it makes it more
2880 # complex if the executable to be traced is executed directly here. It
2881 # also solves issues related to logman.exe that needs to be executed to
2882 # control the kernel trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002883 child_cmd = [
2884 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002885 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002886 tracename,
2887 ]
2888 child = subprocess.Popen(
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00002889 child_cmd + tools.fix_python_path(cmd),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002890 cwd=cwd,
2891 stdin=subprocess.PIPE,
2892 stdout=stdout,
2893 stderr=stderr)
2894 logging.debug('Started child pid: %d' % child.pid)
2895 out = child.communicate()[0]
2896 # This doesn't mean all the grand-children are done. Sadly, we don't have
2897 # a good way to determine that.
2898
2899 with self._lock:
2900 assert tracename not in (i['trace'] for i in self._traces)
2901 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002902 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002903 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002904 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002905 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002906 # Used to figure out the real process when process ids are reused.
2907 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002908 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002909 })
2910
2911 return child.returncode, out
2912
2913 def close(self, _timeout=None):
2914 """Stops the kernel log collection and converts the traces to text
2915 representation.
2916 """
2917 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00002918 try:
2919 super(LogmanTrace.Tracer, self).close()
2920 finally:
2921 cmd_stop = [
2922 'logman.exe',
2923 'stop',
2924 'NT Kernel Logger',
2925 '-ets', # Sends the command directly to the kernel.
2926 ]
2927 logging.debug('Running: %s' % cmd_stop)
2928 subprocess.check_call(
2929 cmd_stop,
2930 stdin=subprocess.PIPE,
2931 stdout=subprocess.PIPE,
2932 stderr=subprocess.STDOUT)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002933
2934 def post_process_log(self):
2935 """Converts the .etl file into .csv then into .json."""
2936 super(LogmanTrace.Tracer, self).post_process_log()
2937 logformat = 'csv'
2938 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002939 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002940
maruel@chromium.org808e3f22013-05-16 16:42:54 +00002941 def _gen_logdata(self):
2942 return {
2943 'format': 'csv',
2944 'traces': self._traces,
2945 }
2946
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002947 def _trim_log(self, logformat):
2948 """Reduces the amount of data in original log by generating a 'reduced'
2949 log.
2950 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002951 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002952 file_handle = codecs.open(
2953 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002954
2955 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002956 assert sys.getfilesystemencoding() == 'mbcs'
2957 file_handle = codecs.open(
2958 self._logname + '.' + logformat, 'r',
2959 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002960
2961 supported_events = LogmanTrace.Context.supported_events()
2962
2963 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002964 """Loads items from the generator and returns the interesting data.
2965
2966 It filters out any uninteresting line and reduce the amount of data in
2967 the trace.
2968 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002969 for index, line in enumerate(generator):
2970 if not index:
2971 if line != self.EXPECTED_HEADER:
2972 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002973 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002974 None, None, None)
2975 continue
2976 # As you can see, the CSV is full of useful non-redundant information:
2977 if (line[2] != '0' or # Event ID
2978 line[3] not in ('2', '3') or # Version
2979 line[4] != '0' or # Channel
2980 line[5] != '0' or # Level
2981 line[7] != '0' or # Task
2982 line[8] != '0x0000000000000000' or # Keyword
2983 line[12] != '' or # Instance ID
2984 line[13] != '' or # Parent Instance ID
2985 line[14] != self.NULL_GUID or # Activity ID
2986 line[15] != ''): # Related Activity ID
2987 raise TracingFailure(
2988 'Found unexpected values in line: %s' % ' '.join(line),
2989 None, None, None)
2990
2991 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
2992 continue
2993
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002994 yield [
2995 line[self.EVENT_NAME],
2996 line[self.TYPE],
2997 line[self.PID],
2998 line[self.TID],
2999 line[self.PROCESSOR_ID],
3000 line[self.TIMESTAMP],
3001 ] + line[self.USER_DATA:]
3002
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003003 # must not convert the trim() call into a list, since it will use too much
3004 # memory for large trace. use a csv file as a workaround since the json
3005 # parser requires a complete in-memory file.
3006 with open('%s.preprocessed' % self._logname, 'wb') as f:
3007 # $ and * can't be used in file name on windows, reducing the likelihood
3008 # of having to escape a string.
3009 out = csv.writer(
3010 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003011 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003012 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003013
3014 def _convert_log(self, logformat):
3015 """Converts the ETL trace to text representation.
3016
3017 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3018 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3019
3020 Arguments:
3021 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3022
3023 Use "tracerpt -?" for help.
3024 """
3025 LOCALE_INVARIANT = 0x7F
3026 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3027 cmd_convert = [
3028 'tracerpt.exe',
3029 '-l', self._logname + '.etl',
3030 '-o', self._logname + '.' + logformat,
3031 '-gmt', # Use UTC
3032 '-y', # No prompt
3033 # Use -of XML to get the header of each items after column 19, e.g. all
3034 # the actual headers of 'User Data'.
3035 ]
3036
3037 if logformat == 'csv':
3038 # tracerpt localizes the 'Type' column, for major brainfuck
3039 # entertainment. I can't imagine any sane reason to do that.
3040 cmd_convert.extend(['-of', 'CSV'])
3041 elif logformat == 'csv_utf16':
3042 # This causes it to use UTF-16, which doubles the log size but ensures
3043 # the log is readable for non-ASCII characters.
3044 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3045 elif logformat == 'xml':
3046 cmd_convert.extend(['-of', 'XML'])
3047 else:
3048 raise ValueError('Unexpected log format \'%s\'' % logformat)
3049 logging.debug('Running: %s' % cmd_convert)
3050 # This can takes tens of minutes for large logs.
3051 # Redirects all output to stderr.
3052 subprocess.check_call(
3053 cmd_convert,
3054 stdin=subprocess.PIPE,
3055 stdout=sys.stderr,
3056 stderr=sys.stderr)
3057
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003058 def __init__(self, use_sudo=False): # pylint: disable=W0613
3059 super(LogmanTrace, self).__init__()
3060 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3061
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003062 @staticmethod
3063 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003064 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003065 if os.path.isfile(logname + ext):
3066 os.remove(logname + ext)
3067
3068 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003069 def parse_log(cls, logname, blacklist, trace_name):
3070 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003071 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003072
3073 def blacklist_more(filepath):
3074 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3075 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3076
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003077 # Create a list of (Context, result_dict) tuples. This is necessary because
3078 # the csv file may be larger than the amount of available memory.
3079 contexes = [
3080 (
3081 cls.Context(
3082 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3083 {
3084 'output': item['output'],
3085 'trace': item['trace'],
3086 },
3087 )
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05003088 for item in tools.read_json(logname)['traces']
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003089 if not trace_name or item['trace'] == trace_name
3090 ]
3091
3092 # The log may be too large to fit in memory and it is not efficient to read
3093 # it multiple times, so multiplex the contexes instead, which is slightly
3094 # more awkward.
3095 with open('%s.preprocessed' % logname, 'rb') as f:
3096 lines = csv.reader(
3097 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3098 for encoded in lines:
3099 line = [s.decode('utf-8') for s in encoded]
3100 # Convert the PID in-place from hex.
3101 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3102 for context in contexes:
3103 if 'exception' in context[1]:
3104 continue
3105 try:
3106 context[0].on_line(line)
3107 except TracingFailure:
3108 context[1]['exception'] = sys.exc_info()
3109
3110 for context in contexes:
3111 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003112 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003113 context[1]['results'] = context[0].to_results()
3114
3115 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003116
3117
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003118def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003119 """Returns the correct implementation for the current OS."""
3120 if sys.platform == 'cygwin':
3121 raise NotImplementedError(
3122 'Not implemented for cygwin, start the script from Win32 python')
3123 flavors = {
3124 'win32': LogmanTrace,
3125 'darwin': Dtrace,
3126 'sunos5': Dtrace,
3127 'freebsd7': Dtrace,
3128 'freebsd8': Dtrace,
3129 }
3130 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003131 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003132
3133
3134def extract_directories(root_dir, files, blacklist):
3135 """Detects if all the files in a directory are in |files| and if so, replace
3136 the individual files by a Results.Directory instance.
3137
3138 Takes a list of Results.File instances and returns a shorter list of
3139 Results.File and Results.Directory instances.
3140
3141 Arguments:
3142 - root_dir: Optional base directory that shouldn't be search further.
3143 - files: list of Results.File instances.
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003144 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003145 """
3146 logging.info(
3147 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3148 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003149 # It is important for root_dir to not be a symlinked path, make sure to call
3150 # os.path.realpath() as needed.
3151 assert not root_dir or (
maruel@chromium.org561d4b22013-09-26 21:08:08 +00003152 os.path.realpath(file_path.get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003153 assert not any(isinstance(f, Results.Directory) for f in files)
3154 # Remove non existent files.
3155 files = [f for f in files if f.existent]
3156 if not files:
3157 return files
3158 # All files must share the same root, which can be None.
3159 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3160
3161 # Creates a {directory: {filename: File}} mapping, up to root.
3162 buckets = {}
3163 if root_dir:
3164 buckets[root_dir] = {}
3165 for fileobj in files:
3166 path = fileobj.full_path
3167 directory = os.path.dirname(path)
3168 assert directory
3169 # Do not use os.path.basename() so trailing os.path.sep is kept.
3170 basename = path[len(directory)+1:]
3171 files_in_directory = buckets.setdefault(directory, {})
3172 files_in_directory[basename] = fileobj
3173 # Add all the directories recursively up to root.
3174 while True:
3175 old_d = directory
3176 directory = os.path.dirname(directory)
3177 if directory + os.path.sep == root_dir or directory == old_d:
3178 break
3179 buckets.setdefault(directory, {})
3180
3181 root_prefix = len(root_dir) + 1 if root_dir else 0
3182 for directory in sorted(buckets, reverse=True):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003183 if not os.path.isdir(directory):
3184 logging.debug(
3185 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3186 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003187 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3188 expected = set(buckets[directory])
3189 if not (actual - expected):
3190 parent = os.path.dirname(directory)
3191 buckets[parent][os.path.basename(directory)] = Results.Directory(
3192 root_dir,
3193 directory[root_prefix:],
3194 False,
3195 sum(f.size for f in buckets[directory].itervalues()),
3196 sum(f.nb_files for f in buckets[directory].itervalues()))
3197 # Remove the whole bucket.
3198 del buckets[directory]
3199
3200 # Reverse the mapping with what remains. The original instances are returned,
3201 # so the cached meta data is kept.
3202 files = sum((x.values() for x in buckets.itervalues()), [])
3203 return sorted(files, key=lambda x: x.path)
3204
3205
3206def trace(logfile, cmd, cwd, api, output):
3207 """Traces an executable. Returns (returncode, output) from api.
3208
3209 Arguments:
3210 - logfile: file to write to.
3211 - cmd: command to run.
3212 - cwd: current directory to start the process in.
3213 - api: a tracing api instance.
3214 - output: if True, returns output, otherwise prints it at the console.
3215 """
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003216 cmd = tools.fix_python_path(cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003217 api.clean_trace(logfile)
3218 with api.get_tracer(logfile) as tracer:
3219 return tracer.trace(cmd, cwd, 'default', output)
3220
3221
maruel@chromium.orge5322512013-08-19 20:17:57 +00003222def CMDclean(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003223 """Cleans up traces."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003224 options, args = parser.parse_args(args)
3225 api = get_api()
3226 api.clean_trace(options.log)
3227 return 0
3228
3229
maruel@chromium.orge5322512013-08-19 20:17:57 +00003230def CMDtrace(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003231 """Traces an executable."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003232 parser.allow_interspersed_args = False
3233 parser.add_option(
3234 '-q', '--quiet', action='store_true',
3235 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003236 parser.add_option(
3237 '-s', '--sudo', action='store_true',
3238 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3239 parser.add_option(
3240 '-n', '--no-sudo', action='store_false',
3241 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003242 options, args = parser.parse_args(args)
3243
3244 if not args:
3245 parser.error('Please provide a command to run')
3246
Marc-Antoine Ruel5906fcd2013-11-08 21:02:24 -05003247 if not can_trace():
3248 parser.error('Please rerun this program with admin privileges')
3249
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003250 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3251 args[0] = os.path.abspath(args[0])
3252
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003253 # options.sudo default value is None, which is to do whatever tracer defaults
3254 # do.
3255 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003256 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3257
3258
maruel@chromium.orge5322512013-08-19 20:17:57 +00003259def CMDread(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003260 """Reads the logs and prints the result."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003261 parser.add_option(
3262 '-V', '--variable',
3263 nargs=2,
3264 action='append',
3265 dest='variables',
3266 metavar='VAR_NAME directory',
3267 default=[],
3268 help=('Variables to replace relative directories against. Example: '
3269 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3270 'home dir with $HOME') % getpass.getuser())
3271 parser.add_option(
3272 '--root-dir',
3273 help='Root directory to base everything off it. Anything outside of this '
3274 'this directory will not be reported')
3275 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003276 '--trace-name',
3277 help='Only reads one of the trace. Defaults to reading all traces')
3278 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003279 '-j', '--json', action='store_true',
3280 help='Outputs raw result data as json')
3281 parser.add_option(
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003282 '--trace-blacklist', action='append', default=[],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003283 help='List of regexp to use as blacklist filter')
3284 options, args = parser.parse_args(args)
3285
3286 if options.root_dir:
maruel@chromium.org561d4b22013-09-26 21:08:08 +00003287 options.root_dir = file_path.get_native_path_case(
maruel@chromium.org7dc17b32013-04-02 22:27:05 +00003288 unicode(os.path.abspath(options.root_dir)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003289
3290 variables = dict(options.variables)
3291 api = get_api()
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05003292 blacklist = tools.gen_blacklist(options.trace_blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003293 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003294 # Process each trace.
3295 output_as_json = []
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003296 try:
3297 for item in data:
3298 if 'exception' in item:
3299 # Do not abort the other traces.
3300 print >> sys.stderr, (
3301 'Trace %s: Got an exception: %s' % (
3302 item['trace'], item['exception'][1]))
3303 continue
3304 results = item['results']
3305 if options.root_dir:
3306 results = results.strip_root(options.root_dir)
3307
3308 if options.json:
3309 output_as_json.append(results.flatten())
3310 else:
3311 simplified = extract_directories(
3312 options.root_dir, results.files, blacklist)
3313 simplified = [f.replace_variables(variables) for f in simplified]
3314 if len(data) > 1:
3315 print('Trace: %s' % item['trace'])
3316 print('Total: %d' % len(results.files))
3317 print('Non existent: %d' % len(results.non_existent))
3318 for f in results.non_existent:
3319 print(' %s' % f.path)
3320 print(
3321 'Interesting: %d reduced to %d' % (
3322 len(results.existent), len(simplified)))
3323 for f in simplified:
3324 print(' %s' % f.path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003325
3326 if options.json:
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05003327 tools.write_json(sys.stdout, output_as_json, False)
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003328 except KeyboardInterrupt:
3329 return 1
3330 except IOError as e:
3331 if e.errno == errno.EPIPE:
3332 # Do not print a stack trace when the output is piped to less and the user
3333 # quits before the whole output was written.
3334 return 1
3335 raise
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003336 return 0
3337
3338
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003339class OptionParserTraceInputs(tools.OptionParserWithLogging):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003340 """Adds automatic --log handling."""
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003341
3342 # Disable --log-file options since both --log and --log-file options are
3343 # confusing.
3344 # TODO(vadimsh): Rename --log-file or --log to something else.
3345 enable_log_file = False
3346
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003347 def __init__(self, **kwargs):
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003348 tools.OptionParserWithLogging.__init__(self, **kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003349 self.add_option(
3350 '-l', '--log', help='Log file to generate or read, required')
3351
3352 def parse_args(self, *args, **kwargs):
3353 """Makes sure the paths make sense.
3354
3355 On Windows, / and \ are often mixed together in a path.
3356 """
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003357 options, args = tools.OptionParserWithLogging.parse_args(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003358 self, *args, **kwargs)
3359 if not options.log:
3360 self.error('Must supply a log file with -l')
3361 options.log = os.path.abspath(options.log)
3362 return options, args
3363
3364
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003365def main(argv):
maruel@chromium.orge5322512013-08-19 20:17:57 +00003366 dispatcher = subcommand.CommandDispatcher(__name__)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003367 try:
maruel@chromium.org3d671992013-08-20 00:38:27 +00003368 return dispatcher.execute(
3369 OptionParserTraceInputs(version=__version__), argv)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003370 except TracingFailure, e:
3371 sys.stderr.write('\nError: ')
3372 sys.stderr.write(str(e))
3373 sys.stderr.write('\n')
3374 return 1
3375
3376
3377if __name__ == '__main__':
maruel@chromium.orge5322512013-08-19 20:17:57 +00003378 fix_encoding.fix_encoding()
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003379 tools.disable_buffering()
maruel@chromium.orge5322512013-08-19 20:17:57 +00003380 colorama.init()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003381 sys.exit(main(sys.argv[1:]))