blob: 77106ed9d2bb63eeab514866453e4a3e925de9e0 [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05003# Copyright 2012 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05004# Use of this source code is governed under the Apache License, Version 2.0 that
5# can be found in the LICENSE file.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00006
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
maruel@chromium.org9e56b592013-05-21 16:56:05 +000023import errno
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000024import getpass
25import glob
26import json
27import logging
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000028import os
29import re
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000030import stat
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000031import subprocess
32import sys
33import tempfile
34import threading
35import time
36import weakref
37
maruel@chromium.orge5322512013-08-19 20:17:57 +000038from third_party import colorama
39from third_party.depot_tools import fix_encoding
40from third_party.depot_tools import subcommand
41
maruel@chromium.org561d4b22013-09-26 21:08:08 +000042from utils import file_path
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000043from utils import tools
44
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000045## OS-specific imports
46
47if sys.platform == 'win32':
maruel@chromium.org561d4b22013-09-26 21:08:08 +000048 from ctypes.wintypes import byref, c_int, c_wchar_p
49 from ctypes.wintypes import windll # pylint: disable=E0611
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000050
51
maruel@chromium.org3d671992013-08-20 00:38:27 +000052__version__ = '0.1'
53
54
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000055BASE_DIR = os.path.dirname(os.path.abspath(__file__))
56ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
57
58
59class TracingFailure(Exception):
60 """An exception occured during tracing."""
61 def __init__(self, description, pid, line_number, line, *args):
62 super(TracingFailure, self).__init__(
63 description, pid, line_number, line, *args)
64 self.description = description
65 self.pid = pid
66 self.line_number = line_number
67 self.line = line
68 self.extra = args
69
70 def __str__(self):
71 out = self.description
72 if self.pid:
73 out += '\npid: %d' % self.pid
74 if self.line_number:
75 out += '\nline: %d' % self.line_number
76 if self.line:
77 out += '\n%s' % self.line
78 if self.extra:
79 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
80 return out
81
82
83## OS-specific functions
84
85if sys.platform == 'win32':
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000086 def get_current_encoding():
87 """Returns the 'ANSI' code page associated to the process."""
88 return 'cp%d' % int(windll.kernel32.GetACP())
89
90
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000091 def CommandLineToArgvW(command_line):
92 """Splits a commandline into argv using CommandLineToArgvW()."""
93 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
94 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +000095 assert isinstance(command_line, unicode)
96 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000097 try:
98 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
99 finally:
100 windll.kernel32.LocalFree(ptr)
101
102
Marc-Antoine Ruel5906fcd2013-11-08 21:02:24 -0500103def can_trace():
104 """Returns True if the user is an administrator on Windows.
105
106 It is required for tracing to work.
107 """
108 if sys.platform != 'win32':
109 return True
110 return bool(windll.shell32.IsUserAnAdmin())
111
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000112
maruel@chromium.org3683afe2013-07-27 00:09:27 +0000113def gen_blacklist(regexes):
114 """Returns a lambda to be used as a blacklist."""
115 compiled = [re.compile(i) for i in regexes]
116 def match(f):
117 return any(j.match(f) for j in compiled)
118 return match
119
120
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000121def create_subprocess_thunk():
122 """Creates a small temporary script to start the child process.
123
124 This thunk doesn't block, its unique name is used to identify it as the
125 parent.
126 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000127 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000128 try:
129 os.write(
130 handle,
131 (
132 'import subprocess, sys\n'
133 'sys.exit(subprocess.call(sys.argv[2:]))\n'
134 ))
135 finally:
136 os.close(handle)
137 return name
138
139
140def create_exec_thunk():
141 """Creates a small temporary script to start the child executable.
142
143 Reads from the file handle provided as the fisrt argument to block, then
144 execv() the command to be traced.
145 """
146 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
147 try:
148 os.write(
149 handle,
150 (
151 'import os, sys\n'
152 'fd = int(sys.argv[1])\n'
153 # This will block until the controlling process writes a byte on the
154 # pipe. It will do so once the tracing tool, e.g. strace, is ready to
155 # trace.
156 'os.read(fd, 1)\n'
157 'os.close(fd)\n'
158 'os.execv(sys.argv[2], sys.argv[2:])\n'
159 ))
160 finally:
161 os.close(handle)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000162 return name
163
164
165def strace_process_quoted_arguments(text):
166 """Extracts quoted arguments on a string and return the arguments as a list.
167
168 Implemented as an automaton. Supports incomplete strings in the form
169 '"foo"...'.
170
171 Example:
172 With text = '"foo", "bar"', the function will return ['foo', 'bar']
173
174 TODO(maruel): Implement escaping.
175 """
176 # All the possible states of the DFA.
177 ( NEED_QUOTE, # Begining of a new arguments.
178 INSIDE_STRING, # Inside an argument.
179 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
180 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
181 # a serie of 3 dots or a comma.
182 NEED_SPACE, # Right after a comma
183 NEED_DOT_2, # Found a dot, need a second one.
184 NEED_DOT_3, # Found second dot, need a third one.
185 NEED_COMMA, # Found third dot, need a comma.
186 ) = range(8)
187
188 state = NEED_QUOTE
189 out = []
190 for index, char in enumerate(text):
191 if char == '"':
192 if state == NEED_QUOTE:
193 state = INSIDE_STRING
194 # A new argument was found.
195 out.append('')
196 elif state == INSIDE_STRING:
197 # The argument is now closed.
198 state = NEED_COMMA_OR_DOT
199 elif state == ESCAPED:
200 out[-1] += char
201 state = INSIDE_STRING
202 else:
203 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000204 'Can\'t process char \'%s\' at column %d for: %r' % (
205 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000206 index,
207 text)
208 elif char == ',':
209 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
210 state = NEED_SPACE
211 elif state == INSIDE_STRING:
212 out[-1] += char
213 elif state == ESCAPED:
214 out[-1] += char
215 state = INSIDE_STRING
216 else:
217 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000218 'Can\'t process char \'%s\' at column %d for: %r' % (
219 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000220 index,
221 text)
222 elif char == ' ':
223 if state == NEED_SPACE:
224 state = NEED_QUOTE
225 elif state == INSIDE_STRING:
226 out[-1] += char
227 elif state == ESCAPED:
228 out[-1] += char
229 state = INSIDE_STRING
230 else:
231 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000232 'Can\'t process char \'%s\' at column %d for: %r' % (
233 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000234 index,
235 text)
236 elif char == '.':
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000237 if state in (NEED_QUOTE, NEED_COMMA_OR_DOT):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000238 # The string is incomplete, this mean the strace -s flag should be
239 # increased.
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000240 # For NEED_QUOTE, the input string would look like '"foo", ...'.
241 # For NEED_COMMA_OR_DOT, the input string would look like '"foo"...'
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000242 state = NEED_DOT_2
243 elif state == NEED_DOT_2:
244 state = NEED_DOT_3
245 elif state == NEED_DOT_3:
246 state = NEED_COMMA
247 elif state == INSIDE_STRING:
248 out[-1] += char
249 elif state == ESCAPED:
250 out[-1] += char
251 state = INSIDE_STRING
252 else:
253 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000254 'Can\'t process char \'%s\' at column %d for: %r' % (
255 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000256 index,
257 text)
258 elif char == '\\':
259 if state == ESCAPED:
260 out[-1] += char
261 state = INSIDE_STRING
262 elif state == INSIDE_STRING:
263 state = ESCAPED
264 else:
265 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000266 'Can\'t process char \'%s\' at column %d for: %r' % (
267 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000268 index,
269 text)
270 else:
271 if state == INSIDE_STRING:
272 out[-1] += char
273 else:
274 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000275 'Can\'t process char \'%s\' at column %d for: %r' % (
276 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000277 index,
278 text)
279 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
280 raise ValueError(
281 'String is incorrectly terminated: %r' % text,
282 text)
283 return out
284
285
286def read_json(filepath):
287 with open(filepath, 'r') as f:
288 return json.load(f)
289
290
291def write_json(filepath_or_handle, data, dense):
292 """Writes data into filepath or file handle encoded as json.
293
294 If dense is True, the json is packed. Otherwise, it is human readable.
295 """
296 if hasattr(filepath_or_handle, 'write'):
297 if dense:
298 filepath_or_handle.write(
299 json.dumps(data, sort_keys=True, separators=(',',':')))
300 else:
301 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
302 else:
303 with open(filepath_or_handle, 'wb') as f:
304 if dense:
305 json.dump(data, f, sort_keys=True, separators=(',',':'))
306 else:
307 json.dump(data, f, sort_keys=True, indent=2)
308
309
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000310def assert_is_renderable(pseudo_string):
311 """Asserts the input is a valid object to be processed by render()."""
312 assert (
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000313 pseudo_string is None or
314 isinstance(pseudo_string, unicode) or
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000315 hasattr(pseudo_string, 'render')), repr(pseudo_string)
316
317
318def render(pseudo_string):
319 """Converts the pseudo-string to an unicode string."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000320 if pseudo_string is None or isinstance(pseudo_string, unicode):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000321 return pseudo_string
322 return pseudo_string.render()
323
324
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000325class Results(object):
326 """Results of a trace session."""
327
328 class _TouchedObject(object):
329 """Something, a file or a directory, that was accessed."""
330 def __init__(self, root, path, tainted, size, nb_files):
331 logging.debug(
332 '%s(%s, %s, %s, %s, %s)' %
333 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000334 assert_is_renderable(root)
335 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000336 self.root = root
337 self.path = path
338 self.tainted = tainted
339 self.nb_files = nb_files
maruel@chromium.org538141b2013-06-03 20:57:17 +0000340 # Can be used as a cache or a default value, depending on context. In
341 # particular, once self.tainted is True, because the path was replaced
342 # with a variable, it is not possible to look up the file size.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000343 self._size = size
344 # These are cache only.
345 self._real_path = None
346
347 # Check internal consistency.
348 assert path, path
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000349 assert tainted or bool(root) != bool(file_path.isabs(path)), (root, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000350 assert tainted or (
351 not os.path.exists(self.full_path) or
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000352 (self.full_path == file_path.get_native_path_case(self.full_path))), (
353 tainted,
354 self.full_path,
355 file_path.get_native_path_case(self.full_path))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000356
357 @property
358 def existent(self):
359 return self.size != -1
360
361 @property
362 def full_path(self):
363 if self.root:
364 return os.path.join(self.root, self.path)
365 return self.path
366
367 @property
368 def real_path(self):
369 """Returns the path with symlinks resolved."""
370 if not self._real_path:
371 self._real_path = os.path.realpath(self.full_path)
372 return self._real_path
373
374 @property
375 def size(self):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000376 """File's size. -1 is not existent.
377
378 Once tainted, it is not possible the retrieve the file size anymore since
379 the path is composed of variables.
380 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000381 if self._size is None and not self.tainted:
382 try:
383 self._size = os.stat(self.full_path).st_size
384 except OSError:
385 self._size = -1
386 return self._size
387
388 def flatten(self):
389 """Returns a dict representing this object.
390
391 A 'size' of 0 means the file was only touched and not read.
392 """
393 return {
394 'path': self.path,
395 'size': self.size,
396 }
397
398 def replace_variables(self, variables):
399 """Replaces the root of this File with one of the variables if it matches.
400
401 If a variable replacement occurs, the cloned object becomes tainted.
402 """
403 for variable, root_path in variables.iteritems():
404 if self.path.startswith(root_path):
405 return self._clone(
406 self.root, variable + self.path[len(root_path):], True)
407 # No need to clone, returns ourself.
408 return self
409
410 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000411 """Returns a clone of itself with 'root' stripped off.
412
413 Note that the file is kept if it is either accessible from a symlinked
414 path that was used to access the file or through the real path.
415 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000416 # Check internal consistency.
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000417 assert (
418 self.tainted or
419 (file_path.isabs(root) and root.endswith(os.path.sep))), root
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000420 if not self.full_path.startswith(root):
421 # Now try to resolve the symlinks to see if it can be reached this way.
422 # Only try *after* trying without resolving symlink.
423 if not self.real_path.startswith(root):
424 return None
425 path = self.real_path
426 else:
427 path = self.full_path
428 return self._clone(root, path[len(root):], self.tainted)
429
430 def _clone(self, new_root, new_path, tainted):
431 raise NotImplementedError(self.__class__.__name__)
432
433 class File(_TouchedObject):
434 """A file that was accessed. May not be present anymore.
435
436 If tainted is true, it means it is not a real path anymore as a variable
437 replacement occured.
438
maruel@chromium.org538141b2013-06-03 20:57:17 +0000439 |mode| can be one of None, TOUCHED, READ or WRITE.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000440 """
maruel@chromium.org538141b2013-06-03 20:57:17 +0000441 # Was probed for existence, and it is existent, but was never _opened_.
442 TOUCHED = 't'
443 # Opened for read only and guaranteed to not have been written to.
444 READ = 'r'
445 # Opened for write.
446 WRITE = 'w'
447
448 # They are listed in order of priority. E.g. if a file is traced as TOUCHED
449 # then as WRITE, only keep WRITE. None means no idea, which is a problem on
450 # Windows.
451 ACCEPTABLE_MODES = (None, TOUCHED, READ, WRITE)
452
453 def __init__(self, root, path, tainted, size, mode):
454 assert mode in self.ACCEPTABLE_MODES
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000455 super(Results.File, self).__init__(root, path, tainted, size, 1)
maruel@chromium.org538141b2013-06-03 20:57:17 +0000456 self.mode = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000457
458 def _clone(self, new_root, new_path, tainted):
459 """Clones itself keeping meta-data."""
460 # Keep the self.size and self._real_path caches for performance reason. It
461 # is also important when the file becomes tainted (with a variable instead
462 # of the real path) since self.path is not an on-disk path anymore so
463 # out._size cannot be updated.
maruel@chromium.org538141b2013-06-03 20:57:17 +0000464 out = self.__class__(new_root, new_path, tainted, self.size, self.mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000465 out._real_path = self._real_path
466 return out
467
maruel@chromium.org538141b2013-06-03 20:57:17 +0000468 def flatten(self):
469 out = super(Results.File, self).flatten()
470 out['mode'] = self.mode
471 return out
472
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000473 class Directory(_TouchedObject):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000474 """A directory of files. Must exist.
475
476 For a Directory instance, self.size is not a cache, it's an actual value
477 that is never modified and represents the total size of the files contained
478 in this directory. It is possible that the directory is empty so that
479 size==0; this happens if there's only an invalid symlink in it.
480 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000481 def __init__(self, root, path, tainted, size, nb_files):
482 """path='.' is a valid value and must be handled appropriately."""
483 assert not path.endswith(os.path.sep), path
484 super(Results.Directory, self).__init__(
485 root, path + os.path.sep, tainted, size, nb_files)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000486
487 def flatten(self):
488 out = super(Results.Directory, self).flatten()
489 out['nb_files'] = self.nb_files
490 return out
491
492 def _clone(self, new_root, new_path, tainted):
493 """Clones itself keeping meta-data."""
494 out = self.__class__(
495 new_root,
496 new_path.rstrip(os.path.sep),
497 tainted,
498 self.size,
499 self.nb_files)
500 out._real_path = self._real_path
501 return out
502
503 class Process(object):
504 """A process that was traced.
505
506 Contains references to the files accessed by this process and its children.
507 """
508 def __init__(self, pid, files, executable, command, initial_cwd, children):
509 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
510 self.pid = pid
511 self.files = sorted(files, key=lambda x: x.path)
512 self.children = children
513 self.executable = executable
514 self.command = command
515 self.initial_cwd = initial_cwd
516
517 # Check internal consistency.
518 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
519 f.path for f in self.files)
520 assert isinstance(self.children, list)
521 assert isinstance(self.files, list)
522
523 @property
524 def all(self):
525 for child in self.children:
526 for i in child.all:
527 yield i
528 yield self
529
530 def flatten(self):
531 return {
532 'children': [c.flatten() for c in self.children],
533 'command': self.command,
534 'executable': self.executable,
535 'files': [f.flatten() for f in self.files],
536 'initial_cwd': self.initial_cwd,
537 'pid': self.pid,
538 }
539
540 def strip_root(self, root):
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000541 assert file_path.isabs(root) and root.endswith(os.path.sep), root
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000542 # Loads the files after since they are constructed as objects.
543 out = self.__class__(
544 self.pid,
545 filter(None, (f.strip_root(root) for f in self.files)),
546 self.executable,
547 self.command,
548 self.initial_cwd,
549 [c.strip_root(root) for c in self.children])
550 logging.debug(
551 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
552 return out
553
554 def __init__(self, process):
555 self.process = process
556 # Cache.
557 self._files = None
558
559 def flatten(self):
560 return {
561 'root': self.process.flatten(),
562 }
563
564 @property
565 def files(self):
566 if self._files is None:
567 self._files = sorted(
568 sum((p.files for p in self.process.all), []),
569 key=lambda x: x.path)
570 return self._files
571
572 @property
573 def existent(self):
574 return [f for f in self.files if f.existent]
575
576 @property
577 def non_existent(self):
578 return [f for f in self.files if not f.existent]
579
580 def strip_root(self, root):
581 """Returns a clone with all the files outside the directory |root| removed
582 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000583
584 It keeps files accessible through the |root| directory or that have been
585 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000586 """
587 # Resolve any symlink
588 root = os.path.realpath(root)
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000589 root = (
590 file_path.get_native_path_case(root).rstrip(os.path.sep) + os.path.sep)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000591 logging.debug('strip_root(%s)' % root)
592 return Results(self.process.strip_root(root))
593
594
595class ApiBase(object):
596 """OS-agnostic API to trace a process and its children."""
597 class Context(object):
598 """Processes one log line at a time and keeps the list of traced processes.
599
600 The parsing is complicated by the fact that logs are traced out of order for
601 strace but in-order for dtrace and logman. In addition, on Windows it is
602 very frequent that processids are reused so a flat list cannot be used. But
603 at the same time, it is impossible to faithfully construct a graph when the
604 logs are processed out of order. So both a tree and a flat mapping are used,
605 the tree is the real process tree, while the flat mapping stores the last
606 valid process for the corresponding processid. For the strace case, the
607 tree's head is guessed at the last moment.
608 """
609 class Process(object):
610 """Keeps context for one traced child process.
611
612 Logs all the files this process touched. Ignores directories.
613 """
614 def __init__(self, blacklist, pid, initial_cwd):
615 # Check internal consistency.
616 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000617 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000618 self.pid = pid
619 # children are Process instances.
620 self.children = []
621 self.initial_cwd = initial_cwd
622 self.cwd = None
maruel@chromium.org538141b2013-06-03 20:57:17 +0000623 self.files = {}
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000624 self.executable = None
625 self.command = None
626 self._blacklist = blacklist
627
628 def to_results_process(self):
629 """Resolves file case sensitivity and or late-bound strings."""
630 # When resolving files, it's normal to get dupe because a file could be
631 # opened multiple times with different case. Resolve the deduplication
632 # here.
633 def fix_path(x):
634 """Returns the native file path case.
635
636 Converts late-bound strings.
637 """
638 if not x:
639 # Do not convert None instance to 'None'.
640 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000641 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000642 if os.path.isabs(x):
643 # If the path is not absolute, which tends to happen occasionally on
644 # Windows, it is not possible to get the native path case so ignore
645 # that trace. It mostly happens for 'executable' value.
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000646 x = file_path.get_native_path_case(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000647 return x
648
maruel@chromium.org538141b2013-06-03 20:57:17 +0000649 def fix_and_blacklist_path(x, m):
650 """Receives a tuple (filepath, mode) and processes filepath."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000651 x = fix_path(x)
652 if not x:
653 return
654 # The blacklist needs to be reapplied, since path casing could
655 # influence blacklisting.
656 if self._blacklist(x):
657 return
maruel@chromium.org538141b2013-06-03 20:57:17 +0000658 # Filters out directories. Some may have passed through.
659 if os.path.isdir(x):
660 return
661 return x, m
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000662
maruel@chromium.org538141b2013-06-03 20:57:17 +0000663 # Renders all the files as strings, as some could be RelativePath
664 # instances. It is important to do it first since there could still be
665 # multiple entries with the same path but different modes.
666 rendered = (
667 fix_and_blacklist_path(f, m) for f, m in self.files.iteritems())
668 files = sorted(
669 (f for f in rendered if f),
670 key=lambda x: (x[0], Results.File.ACCEPTABLE_MODES.index(x[1])))
671 # Then converting into a dict will automatically clean up lesser
672 # important values.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000673 files = [
maruel@chromium.org538141b2013-06-03 20:57:17 +0000674 Results.File(None, f, False, None, m)
675 for f, m in dict(files).iteritems()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000676 ]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000677 return Results.Process(
678 self.pid,
679 files,
680 fix_path(self.executable),
681 self.command,
682 fix_path(self.initial_cwd),
683 [c.to_results_process() for c in self.children])
684
maruel@chromium.org538141b2013-06-03 20:57:17 +0000685 def add_file(self, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000686 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000687 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000688 return
maruel@chromium.org538141b2013-06-03 20:57:17 +0000689 logging.debug('add_file(%d, %s, %s)', self.pid, filepath, mode)
690 # Note that filepath and not render(filepath) is added. It is because
691 # filepath could be something else than a string, like a RelativePath
692 # instance for dtrace logs.
693 modes = Results.File.ACCEPTABLE_MODES
694 old_mode = self.files.setdefault(filepath, mode)
695 if old_mode != mode and modes.index(old_mode) < modes.index(mode):
696 # Take the highest value.
697 self.files[filepath] = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000698
699 def __init__(self, blacklist):
700 self.blacklist = blacklist
701 # Initial process.
702 self.root_process = None
703 # dict to accelerate process lookup, to not have to lookup the whole graph
704 # each time.
705 self._process_lookup = {}
706
707 class Tracer(object):
708 """During it's lifetime, the tracing subsystem is enabled."""
709 def __init__(self, logname):
710 self._logname = logname
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000711 self._lock = threading.RLock()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000712 self._traces = []
713 self._initialized = True
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000714 self._scripts_to_cleanup = []
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000715
716 def trace(self, cmd, cwd, tracename, output):
717 """Runs the OS-specific trace program on an executable.
718
719 Arguments:
720 - cmd: The command (a list) to run.
721 - cwd: Current directory to start the child process in.
722 - tracename: Name of the trace in the logname file.
723 - output: If False, redirects output to PIPEs.
724
725 Returns a tuple (resultcode, output) and updates the internal trace
726 entries.
727 """
728 # The implementation adds an item to self._traces.
729 raise NotImplementedError(self.__class__.__name__)
730
731 def close(self, _timeout=None):
732 """Saves the meta-data in the logname file.
733
734 For kernel-based tracing, stops the tracing subsystem.
735
736 Must not be used manually when using 'with' construct.
737 """
738 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000739 if not self._initialized:
740 raise TracingFailure(
741 'Called %s.close() on an unitialized object' %
742 self.__class__.__name__,
743 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000744 try:
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000745 while self._scripts_to_cleanup:
746 try:
747 os.remove(self._scripts_to_cleanup.pop())
748 except OSError as e:
749 logging.error('Failed to delete a temporary script: %s', e)
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000750 write_json(self._logname, self._gen_logdata(), False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000751 finally:
752 self._initialized = False
753
754 def post_process_log(self):
755 """Post-processes the log so it becomes faster to load afterward.
756
757 Must not be used manually when using 'with' construct.
758 """
759 assert not self._initialized, 'Must stop tracing first.'
760
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000761 def _gen_logdata(self):
762 """Returns the data to be saved in the trace file."""
763 return {
764 'traces': self._traces,
765 }
766
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000767 def __enter__(self):
768 """Enables 'with' statement."""
769 return self
770
771 def __exit__(self, exc_type, exc_value, traceback):
772 """Enables 'with' statement."""
773 self.close()
774 # If an exception was thrown, do not process logs.
775 if not exc_type:
776 self.post_process_log()
777
778 def get_tracer(self, logname):
779 """Returns an ApiBase.Tracer instance.
780
781 Initializes the tracing subsystem, which is a requirement for kernel-based
782 tracers. Only one tracer instance should be live at a time!
783
784 logname is the filepath to the json file that will contain the meta-data
785 about the logs.
786 """
787 return self.Tracer(logname)
788
789 @staticmethod
790 def clean_trace(logname):
791 """Deletes an old log."""
792 raise NotImplementedError()
793
794 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +0000795 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000796 """Processes trace logs and returns the files opened and the files that do
797 not exist.
798
799 It does not track directories.
800
maruel@chromium.orgec74ff82012-10-29 18:14:47 +0000801 Arguments:
802 - logname: must be an absolute path.
803 - blacklist: must be a lambda.
804 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +0000805
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000806 Most of the time, files that do not exist are temporary test files that
807 should be put in /tmp instead. See http://crbug.com/116251.
808
809 Returns a list of dict with keys:
810 - results: A Results instance.
811 - trace: The corresponding tracename parameter provided to
812 get_tracer().trace().
813 - output: Output gathered during execution, if get_tracer().trace(...,
814 output=False) was used.
815 """
816 raise NotImplementedError(cls.__class__.__name__)
817
818
819class Strace(ApiBase):
820 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000821 @staticmethod
822 def load_filename(filename):
823 """Parses a filename in a log."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000824 # TODO(maruel): Be compatible with strace -x.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000825 assert isinstance(filename, str)
826 out = ''
827 i = 0
828 while i < len(filename):
829 c = filename[i]
830 if c == '\\':
831 out += chr(int(filename[i+1:i+4], 8))
832 i += 4
833 else:
834 out += c
835 i += 1
836 # TODO(maruel): That's not necessarily true that the current code page is
837 # utf-8.
838 return out.decode('utf-8')
839
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000840 class Context(ApiBase.Context):
841 """Processes a strace log line and keeps the list of existent and non
842 existent files accessed.
843
844 Ignores directories.
845
846 Uses late-binding to processes the cwd of each process. The problem is that
847 strace generates one log file per process it traced but doesn't give any
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000848 information about which process was started when and by who. So process the
849 logs out of order and use late binding with RelativePath to be able to
850 deduce the initial directory of each process once all the logs are parsed.
851
852 TODO(maruel): Use the script even in the non-sudo case, so log parsing can
853 be done in two phase: first find the root process, then process the child
854 processes in order. With that, it should be possible to not use RelativePath
855 anymore. This would significantly simplify the code!
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000856 """
857 class Process(ApiBase.Context.Process):
858 """Represents the state of a process.
859
860 Contains all the information retrieved from the pid-specific log.
861 """
862 # Function names are using ([a-z_0-9]+)
863 # This is the most common format. function(args) = result
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000864 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.*?)\)\s+= (.+)$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000865 # An interrupted function call, only grab the minimal header.
866 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
867 # A resumed function call.
868 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
869 # A process received a signal.
870 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
871 # A process didn't handle a signal. Ignore any junk appearing before,
872 # because the process was forcibly killed so it won't open any new file.
873 RE_KILLED = re.compile(
874 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
875 # The process has exited.
876 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
877 # A call was canceled. Ignore any prefix.
878 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
879 # Happens when strace fails to even get the function name.
880 UNNAMED_FUNCTION = '????'
881
882 # Corner-case in python, a class member function decorator must not be
883 # @staticmethod.
884 def parse_args(regexp, expect_zero): # pylint: disable=E0213
885 """Automatically convert the str 'args' into a list of processed
886 arguments.
887
888 Arguments:
889 - regexp is used to parse args.
890 - expect_zero: one of True, False or None.
891 - True: will check for result.startswith('0') first and will ignore
892 the trace line completely otherwise. This is important because for
893 many functions, the regexp will not process if the call failed.
894 - False: will check for not result.startswith(('?', '-1')) for the
895 same reason than with True.
896 - None: ignore result.
897 """
898 def meta_hook(function):
899 assert function.__name__.startswith('handle_')
900 def hook(self, args, result):
901 if expect_zero is True and not result.startswith('0'):
902 return
903 if expect_zero is False and result.startswith(('?', '-1')):
904 return
905 match = re.match(regexp, args)
906 if not match:
907 raise TracingFailure(
908 'Failed to parse %s(%s) = %s' %
909 (function.__name__[len('handle_'):], args, result),
910 None, None, None)
911 return function(self, match.groups(), result)
912 return hook
913 return meta_hook
914
915 class RelativePath(object):
916 """A late-bound relative path."""
917 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000918 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000919 self.parent = parent
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000920 assert (
921 value is None or
922 (isinstance(value, unicode) and not os.path.isabs(value)))
923 self.value = value
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000924 if self.value:
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000925 # TODO(maruel): On POSIX, '\\' is a valid character so remove this
926 # assert.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000927 assert '\\' not in self.value, value
928 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000929
930 def render(self):
931 """Returns the current directory this instance is representing.
932
933 This function is used to return the late-bound value.
934 """
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000935 assert self.parent is not None
936 parent = render(self.parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000937 if self.value:
938 return os.path.normpath(os.path.join(parent, self.value))
939 return parent
940
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000941 def __init__(self, root, pid):
942 """Keeps enough information to be able to guess the original process
943 root.
944
945 strace doesn't store which process was the initial process. So more
946 information needs to be kept so the graph can be reconstructed from the
947 flat map.
948 """
949 logging.info('%s(%d)' % (self.__class__.__name__, pid))
950 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
951 assert isinstance(root, ApiBase.Context)
952 self._root = weakref.ref(root)
953 # The dict key is the function name of the pending call, like 'open'
954 # or 'execve'.
955 self._pending_calls = {}
956 self._line_number = 0
957 # Current directory when the process started.
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000958 if isinstance(self._root(), unicode):
959 self.initial_cwd = self._root()
960 else:
961 self.initial_cwd = self.RelativePath(self._root(), None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000962 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +0000963 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000964
965 def get_cwd(self):
966 """Returns the best known value of cwd."""
967 return self.cwd or self.initial_cwd
968
969 def render(self):
970 """Returns the string value of the RelativePath() object.
971
972 Used by RelativePath. Returns the initial directory and not the
973 current one since the current directory 'cwd' validity is time-limited.
974
975 The validity is only guaranteed once all the logs are processed.
976 """
977 return self.initial_cwd.render()
978
979 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000980 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000981 self._line_number += 1
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000982 try:
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000983 if self._done:
984 raise TracingFailure(
985 'Found a trace for a terminated process or corrupted log',
986 None, None, None)
987
988 if self.RE_SIGNAL.match(line):
989 # Ignore signals.
990 return
991
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000992 match = self.RE_KILLED.match(line)
993 if match:
994 # Converts a '+++ killed by Foo +++' trace into an exit_group().
995 self.handle_exit_group(match.group(1), None)
996 return
997
998 match = self.RE_PROCESS_EXITED.match(line)
999 if match:
1000 # Converts a '+++ exited with 1 +++' trace into an exit_group()
1001 self.handle_exit_group(match.group(1), None)
1002 return
1003
1004 match = self.RE_UNFINISHED.match(line)
1005 if match:
1006 if match.group(1) in self._pending_calls:
1007 raise TracingFailure(
1008 'Found two unfinished calls for the same function',
1009 None, None, None,
1010 self._pending_calls)
1011 self._pending_calls[match.group(1)] = (
1012 match.group(1) + match.group(2))
1013 return
1014
1015 match = self.RE_UNAVAILABLE.match(line)
1016 if match:
1017 # This usually means a process was killed and a pending call was
1018 # canceled.
1019 # TODO(maruel): Look up the last exit_group() trace just above and
1020 # make sure any self._pending_calls[anything] is properly flushed.
1021 return
1022
1023 match = self.RE_RESUMED.match(line)
1024 if match:
1025 if match.group(1) not in self._pending_calls:
1026 raise TracingFailure(
1027 'Found a resumed call that was not logged as unfinished',
1028 None, None, None,
1029 self._pending_calls)
1030 pending = self._pending_calls.pop(match.group(1))
1031 # Reconstruct the line.
1032 line = pending + match.group(2)
1033
1034 match = self.RE_HEADER.match(line)
1035 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001036 # The line is corrupted. It happens occasionally when a process is
1037 # killed forcibly with activity going on. Assume the process died.
1038 # No other line can be processed afterward.
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001039 logging.debug('%d is done: %s', self.pid, line)
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001040 self._done = True
1041 return
1042
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001043 if match.group(1) == self.UNNAMED_FUNCTION:
1044 return
1045
1046 # It's a valid line, handle it.
1047 handler = getattr(self, 'handle_%s' % match.group(1), None)
1048 if not handler:
1049 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1050 return handler(match.group(2), match.group(3))
1051 except TracingFailure, e:
1052 # Hack in the values since the handler could be a static function.
1053 e.pid = self.pid
1054 e.line = line
1055 e.line_number = self._line_number
1056 # Re-raise the modified exception.
1057 raise
1058 except (KeyError, NotImplementedError, ValueError), e:
1059 raise TracingFailure(
1060 'Trace generated a %s exception: %s' % (
1061 e.__class__.__name__, str(e)),
1062 self.pid,
1063 self._line_number,
1064 line,
1065 e)
1066
1067 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1068 def handle_access(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001069 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001070
1071 @parse_args(r'^\"(.+?)\"$', True)
1072 def handle_chdir(self, args, _result):
1073 """Updates cwd."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001074 self.cwd = self._mangle(args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001075 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1076
maruel@chromium.org538141b2013-06-03 20:57:17 +00001077 @parse_args(r'^\"(.+?)\", (\d+), (\d+)$', False)
1078 def handle_chown(self, args, _result):
1079 # TODO(maruel): Look at result?
1080 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org55550722013-05-28 14:43:14 +00001081
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001082 def handle_clone(self, _args, result):
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001083 self._handling_forking('clone', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001084
1085 def handle_close(self, _args, _result):
1086 pass
1087
maruel@chromium.org538141b2013-06-03 20:57:17 +00001088 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1089 def handle_chmod(self, args, _result):
1090 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001091
maruel@chromium.org538141b2013-06-03 20:57:17 +00001092 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1093 def handle_creat(self, args, _result):
1094 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001095
1096 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1097 def handle_execve(self, args, _result):
1098 # Even if in practice execve() doesn't returns when it succeeds, strace
1099 # still prints '0' as the result.
1100 filepath = args[0]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001101 self._handle_file(filepath, Results.File.READ)
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001102 self.executable = self._mangle(filepath)
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001103 try:
1104 self.command = strace_process_quoted_arguments(args[1])
1105 except ValueError as e:
1106 raise TracingFailure(
1107 'Failed to process command line argument:\n%s' % e.args[0],
1108 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001109
1110 def handle_exit_group(self, _args, _result):
1111 """Removes cwd."""
1112 self.cwd = None
1113
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001114 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1115 def handle_faccessat(self, args, _results):
1116 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001117 self._handle_file(args[1], Results.File.TOUCHED)
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001118 else:
1119 raise Exception('Relative faccess not implemented.')
1120
maruel@chromium.org0781f322013-05-28 19:45:49 +00001121 def handle_fallocate(self, _args, result):
1122 pass
1123
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001124 def handle_fork(self, args, result):
1125 self._handle_unknown('fork', args, result)
1126
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001127 def handle_futex(self, _args, _result):
1128 pass
1129
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001130 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1131 def handle_getcwd(self, args, _result):
1132 if os.path.isabs(args[0]):
1133 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1134 if not isinstance(self.cwd, unicode):
1135 # Take the occasion to reset the path.
1136 self.cwd = self._mangle(args[0])
1137 else:
1138 # It should always match.
1139 assert self.cwd == Strace.load_filename(args[0]), (
1140 self.cwd, args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001141
1142 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1143 def handle_link(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001144 self._handle_file(args[0], Results.File.READ)
1145 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001146
1147 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1148 def handle_lstat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001149 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001150
1151 def handle_mkdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001152 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001153 pass
1154
1155 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1156 def handle_open(self, args, _result):
1157 if 'O_DIRECTORY' in args[1]:
1158 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001159 self._handle_file(
1160 args[0],
1161 Results.File.READ if 'O_RDONLY' in args[1] else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001162
1163 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1164 def handle_openat(self, args, _result):
1165 if 'O_DIRECTORY' in args[2]:
1166 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001167 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001168 self._handle_file(
1169 args[1],
1170 Results.File.READ if 'O_RDONLY' in args[2]
1171 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001172 else:
1173 # TODO(maruel): Implement relative open if necessary instead of the
1174 # AT_FDCWD flag, let's hope not since this means tracking all active
1175 # directory handles.
maruel@chromium.org538141b2013-06-03 20:57:17 +00001176 raise NotImplementedError('Relative open via openat not implemented.')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001177
1178 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1179 def handle_readlink(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001180 self._handle_file(args[0], Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001181
1182 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1183 def handle_rename(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001184 self._handle_file(args[0], Results.File.READ)
1185 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001186
1187 def handle_rmdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001188 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001189 pass
1190
1191 def handle_setxattr(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001192 # TODO(maruel): self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001193 pass
1194
1195 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1196 def handle_stat(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001197 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001198
maruel@chromium.org538141b2013-06-03 20:57:17 +00001199 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1200 def handle_symlink(self, args, _result):
1201 self._handle_file(args[0], Results.File.TOUCHED)
1202 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001203
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001204 @parse_args(r'^\"(.+?)\", \d+', True)
1205 def handle_truncate(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001206 self._handle_file(args[0], Results.File.WRITE)
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001207
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001208 def handle_unlink(self, _args, _result):
1209 # In theory, the file had to be created anyway.
1210 pass
1211
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001212 def handle_unlinkat(self, _args, _result):
1213 # In theory, the file had to be created anyway.
1214 pass
1215
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001216 def handle_statfs(self, _args, _result):
1217 pass
1218
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001219 def handle_utimensat(self, _args, _result):
1220 pass
1221
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001222 def handle_vfork(self, _args, result):
1223 self._handling_forking('vfork', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001224
1225 @staticmethod
1226 def _handle_unknown(function, args, result):
1227 raise TracingFailure(
1228 'Unexpected/unimplemented trace %s(%s)= %s' %
1229 (function, args, result),
1230 None, None, None)
1231
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001232 def _handling_forking(self, name, result):
1233 """Transfers cwd."""
1234 if result.startswith(('?', '-1')):
1235 # The call failed.
1236 return
1237 # Update the other process right away.
1238 childpid = int(result)
1239 child = self._root().get_or_set_proc(childpid)
1240 if child.parentid is not None or childpid in self.children:
1241 raise TracingFailure(
1242 'Found internal inconsitency in process lifetime detection '
1243 'during a %s() call' % name,
1244 None, None, None)
1245
1246 # Copy the cwd object.
1247 child.initial_cwd = self.get_cwd()
1248 child.parentid = self.pid
1249 # It is necessary because the logs are processed out of order.
1250 self.children.append(child)
1251
maruel@chromium.org538141b2013-06-03 20:57:17 +00001252 def _handle_file(self, filepath, mode):
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001253 filepath = self._mangle(filepath)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001254 self.add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001255
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001256 def _mangle(self, filepath):
1257 """Decodes a filepath found in the log and convert it to a late-bound
1258 path if necessary.
1259
1260 |filepath| is an strace 'encoded' string and the returned value is
1261 either an unicode string if the path was absolute or a late bound path
1262 otherwise.
1263 """
1264 filepath = Strace.load_filename(filepath)
1265 if os.path.isabs(filepath):
1266 return filepath
1267 else:
1268 if isinstance(self.get_cwd(), unicode):
1269 return os.path.normpath(os.path.join(self.get_cwd(), filepath))
1270 return self.RelativePath(self.get_cwd(), filepath)
1271
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001272 def __init__(self, blacklist, root_pid, initial_cwd):
1273 """|root_pid| may be None when the root process is not known.
1274
1275 In that case, a search is done after reading all the logs to figure out
1276 the root process.
1277 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001278 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001279 assert_is_renderable(initial_cwd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001280 self.root_pid = root_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001281 self.initial_cwd = initial_cwd
1282
1283 def render(self):
1284 """Returns the string value of the initial cwd of the root process.
1285
1286 Used by RelativePath.
1287 """
1288 return self.initial_cwd
1289
1290 def on_line(self, pid, line):
1291 """Transfers control into the Process.on_line() function."""
1292 self.get_or_set_proc(pid).on_line(line.strip())
1293
1294 def to_results(self):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001295 """If necessary, finds back the root process and verify consistency."""
1296 if not self.root_pid:
1297 # The non-sudo case. The traced process was started by strace itself,
1298 # so the pid of the traced process is not known.
1299 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1300 if len(root) == 1:
1301 self.root_process = root[0]
1302 # Save it for later.
1303 self.root_pid = self.root_process.pid
1304 else:
1305 # The sudo case. The traced process was started manually so its pid is
1306 # known.
1307 self.root_process = self._process_lookup.get(self.root_pid)
1308 if not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001309 raise TracingFailure(
1310 'Found internal inconsitency in process lifetime detection '
1311 'while finding the root process',
1312 None,
1313 None,
1314 None,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001315 self.root_pid,
1316 sorted(self._process_lookup))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001317 process = self.root_process.to_results_process()
1318 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1319 raise TracingFailure(
1320 'Found internal inconsitency in process lifetime detection '
1321 'while looking for len(tree) == len(list)',
1322 None,
1323 None,
1324 None,
1325 sorted(self._process_lookup),
1326 sorted(p.pid for p in process.all))
1327 return Results(process)
1328
1329 def get_or_set_proc(self, pid):
1330 """Returns the Context.Process instance for this pid or creates a new one.
1331 """
1332 if not pid or not isinstance(pid, int):
1333 raise TracingFailure(
1334 'Unpexpected value for pid: %r' % pid,
1335 pid,
1336 None,
1337 None,
1338 pid)
1339 if pid not in self._process_lookup:
1340 self._process_lookup[pid] = self.Process(self, pid)
1341 return self._process_lookup[pid]
1342
1343 @classmethod
1344 def traces(cls):
1345 """Returns the list of all handled traces to pass this as an argument to
1346 strace.
1347 """
1348 prefix = 'handle_'
1349 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1350
1351 class Tracer(ApiBase.Tracer):
1352 MAX_LEN = 256
1353
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001354 def __init__(self, logname, use_sudo):
1355 super(Strace.Tracer, self).__init__(logname)
1356 self.use_sudo = use_sudo
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001357 if use_sudo:
1358 # TODO(maruel): Use the jump script systematically to make it easy to
1359 # figure out the root process, so RelativePath is not necessary anymore.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001360 self._child_script = create_exec_thunk()
1361 self._scripts_to_cleanup.append(self._child_script)
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001362
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001363 def trace(self, cmd, cwd, tracename, output):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001364 """Runs strace on an executable.
1365
1366 When use_sudo=True, it is a 3-phases process: start the thunk, start
1367 sudo strace with the pid of the thunk and then have the thunk os.execve()
1368 the process to trace.
1369 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001370 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1371 assert os.path.isabs(cmd[0]), cmd[0]
1372 assert os.path.isabs(cwd), cwd
1373 assert os.path.normpath(cwd) == cwd, cwd
1374 with self._lock:
1375 if not self._initialized:
1376 raise TracingFailure(
1377 'Called Tracer.trace() on an unitialized object',
1378 None, None, None, tracename)
1379 assert tracename not in (i['trace'] for i in self._traces)
1380 stdout = stderr = None
1381 if output:
1382 stdout = subprocess.PIPE
1383 stderr = subprocess.STDOUT
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001384
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001385 # Ensure all file related APIs are hooked.
1386 traces = ','.join(Strace.Context.traces() + ['file'])
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001387 flags = [
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001388 # Each child process has its own trace file. It is necessary because
1389 # strace may generate corrupted log file if multiple processes are
1390 # heavily doing syscalls simultaneously.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001391 '-ff',
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001392 # Reduce whitespace usage.
1393 '-a1',
1394 # hex encode non-ascii strings.
1395 # TODO(maruel): '-x',
1396 # TODO(maruel): '-ttt',
1397 # Signals are unnecessary noise here. Note the parser can cope with them
1398 # but reduce the unnecessary output.
1399 '-esignal=none',
1400 # Print as much data as wanted.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001401 '-s', '%d' % self.MAX_LEN,
1402 '-e', 'trace=%s' % traces,
1403 '-o', self._logname + '.' + tracename,
1404 ]
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001405
1406 if self.use_sudo:
1407 pipe_r, pipe_w = os.pipe()
1408 # Start the child process paused.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001409 target_cmd = [sys.executable, self._child_script, str(pipe_r)] + cmd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001410 logging.debug(' '.join(target_cmd))
1411 child_proc = subprocess.Popen(
1412 target_cmd,
1413 stdin=subprocess.PIPE,
1414 stdout=stdout,
1415 stderr=stderr,
1416 cwd=cwd)
1417
1418 # TODO(maruel): both processes must use the same UID for it to work
1419 # without sudo. Look why -p is failing at the moment without sudo.
1420 trace_cmd = [
1421 'sudo',
1422 'strace',
1423 '-p', str(child_proc.pid),
1424 ] + flags
1425 logging.debug(' '.join(trace_cmd))
1426 strace_proc = subprocess.Popen(
1427 trace_cmd,
1428 cwd=cwd,
1429 stdin=subprocess.PIPE,
1430 stdout=subprocess.PIPE,
1431 stderr=subprocess.PIPE)
1432
1433 line = strace_proc.stderr.readline()
1434 if not re.match(r'^Process \d+ attached \- interrupt to quit$', line):
1435 # TODO(maruel): Raise an exception.
1436 assert False, line
1437
1438 # Now fire the child process.
1439 os.write(pipe_w, 'x')
1440
1441 out = child_proc.communicate()[0]
1442 strace_out = strace_proc.communicate()[0]
1443
1444 # TODO(maruel): if strace_proc.returncode: Add an exception.
1445 saved_out = strace_out if strace_proc.returncode else out
1446 root_pid = child_proc.pid
1447 else:
1448 # Non-sudo case.
1449 trace_cmd = [
1450 'strace',
1451 ] + flags + cmd
1452 logging.debug(' '.join(trace_cmd))
1453 child_proc = subprocess.Popen(
1454 trace_cmd,
1455 cwd=cwd,
1456 stdin=subprocess.PIPE,
1457 stdout=stdout,
1458 stderr=stderr)
1459 out = child_proc.communicate()[0]
1460 # TODO(maruel): Walk the logs and figure out the root process would
1461 # simplify parsing the logs a *lot*.
1462 saved_out = out
1463 # The trace reader will have to figure out.
1464 root_pid = None
1465
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001466 with self._lock:
1467 assert tracename not in (i['trace'] for i in self._traces)
1468 self._traces.append(
1469 {
1470 'cmd': cmd,
1471 'cwd': cwd,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001472 'output': saved_out,
1473 'pid': root_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001474 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001475 })
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001476 return child_proc.returncode, out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001477
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001478 def __init__(self, use_sudo=None):
1479 super(Strace, self).__init__()
1480 self.use_sudo = use_sudo
1481
1482 def get_tracer(self, logname):
1483 return self.Tracer(logname, self.use_sudo)
1484
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001485 @staticmethod
1486 def clean_trace(logname):
1487 if os.path.isfile(logname):
1488 os.remove(logname)
1489 # Also delete any pid specific file from previous traces.
1490 for i in glob.iglob(logname + '.*'):
1491 if i.rsplit('.', 1)[1].isdigit():
1492 os.remove(i)
1493
1494 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001495 def parse_log(cls, logname, blacklist, trace_name):
1496 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001497 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001498 data = read_json(logname)
1499 out = []
1500 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001501 if trace_name and item['trace'] != trace_name:
1502 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001503 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001504 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001505 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001506 }
1507 try:
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001508 context = cls.Context(blacklist, item['pid'], item['cwd'])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001509 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001510 logging.debug('Reading %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001511 pid = pidfile.rsplit('.', 1)[1]
1512 if pid.isdigit():
1513 pid = int(pid)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001514 found_line = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001515 for line in open(pidfile, 'rb'):
1516 context.on_line(pid, line)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001517 found_line = True
1518 if not found_line:
1519 # Ensures that a completely empty trace still creates the
1520 # corresponding Process instance by logging a dummy line.
1521 context.on_line(pid, '')
1522 else:
1523 logging.warning('Found unexpected file %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001524 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001525 except TracingFailure:
1526 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001527 out.append(result)
1528 return out
1529
1530
1531class Dtrace(ApiBase):
1532 """Uses DTrace framework through dtrace. Requires root access.
1533
1534 Implies Mac OSX.
1535
1536 dtruss can't be used because it has compatibility issues with python.
1537
1538 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1539 get the absolute path of the 'cwd' dtrace variable from the probe.
1540
1541 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1542 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1543 manually.
1544
1545 errno is not printed in the log since this implementation currently only cares
1546 about files that were successfully opened.
1547 """
1548 class Context(ApiBase.Context):
1549 # Format: index pid function(args)
1550 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1551
1552 # Arguments parsing.
1553 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1554 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1555 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1556 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1557 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1558 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1559
maruel@chromium.orgac361162013-06-04 15:54:06 +00001560 # O_DIRECTORY is not defined on Windows and dtrace doesn't exist on Windows.
1561 O_DIRECTORY = os.O_DIRECTORY if hasattr(os, 'O_DIRECTORY') else None
maruel@chromium.org538141b2013-06-03 20:57:17 +00001562 O_RDWR = os.O_RDWR
1563 O_WRONLY = os.O_WRONLY
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001564
1565 class Process(ApiBase.Context.Process):
1566 def __init__(self, *args):
1567 super(Dtrace.Context.Process, self).__init__(*args)
1568 self.cwd = self.initial_cwd
1569
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001570 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001571 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001572 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001573 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001574 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001575 # Process ID of the temporary script created by create_subprocess_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001576 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001577 self._initial_cwd = initial_cwd
1578 self._line_number = 0
1579
1580 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001581 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001582 self._line_number += 1
1583 match = self.RE_HEADER.match(line)
1584 if not match:
1585 raise TracingFailure(
1586 'Found malformed line: %s' % line,
1587 None,
1588 self._line_number,
1589 line)
1590 fn = getattr(
1591 self,
1592 'handle_%s' % match.group(2).replace('-', '_'),
1593 self._handle_ignored)
1594 # It is guaranteed to succeed because of the regexp. Or at least I thought
1595 # it would.
1596 pid = int(match.group(1))
1597 try:
1598 return fn(pid, match.group(3))
1599 except TracingFailure, e:
1600 # Hack in the values since the handler could be a static function.
1601 e.pid = pid
1602 e.line = line
1603 e.line_number = self._line_number
1604 # Re-raise the modified exception.
1605 raise
1606 except (KeyError, NotImplementedError, ValueError), e:
1607 raise TracingFailure(
1608 'Trace generated a %s exception: %s' % (
1609 e.__class__.__name__, str(e)),
1610 pid,
1611 self._line_number,
1612 line,
1613 e)
1614
1615 def to_results(self):
1616 process = self.root_process.to_results_process()
1617 # Internal concistency check.
1618 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1619 raise TracingFailure(
1620 'Found internal inconsitency in process lifetime detection '
1621 'while looking for len(tree) == len(list)',
1622 None,
1623 None,
1624 None,
1625 sorted(self._process_lookup),
1626 sorted(p.pid for p in process.all))
1627 return Results(process)
1628
1629 def handle_dtrace_BEGIN(self, _pid, args):
1630 if not self.RE_DTRACE_BEGIN.match(args):
1631 raise TracingFailure(
1632 'Found internal inconsitency in dtrace_BEGIN log line',
1633 None, None, None)
1634
1635 def handle_proc_start(self, pid, args):
1636 """Transfers cwd.
1637
1638 The dtrace script already takes care of only tracing the processes that
1639 are child of the traced processes so there is no need to verify the
1640 process hierarchy.
1641 """
1642 if pid in self._process_lookup:
1643 raise TracingFailure(
1644 'Found internal inconsitency in proc_start: %d started two times' %
1645 pid,
1646 None, None, None)
1647 match = self.RE_PROC_START.match(args)
1648 if not match:
1649 raise TracingFailure(
1650 'Failed to parse arguments: %s' % args,
1651 None, None, None)
1652 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001653 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001654 proc = self.root_process = self.Process(
1655 self.blacklist, pid, self._initial_cwd)
1656 elif ppid in self._process_lookup:
1657 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1658 self._process_lookup[ppid].children.append(proc)
1659 else:
1660 # Another process tree, ignore.
1661 return
1662 self._process_lookup[pid] = proc
1663 logging.debug(
1664 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001665 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001666
1667 def handle_proc_exit(self, pid, _args):
1668 """Removes cwd."""
1669 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001670 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001671 self._process_lookup[pid].cwd = None
1672
1673 def handle_execve(self, pid, args):
1674 """Sets the process' executable.
1675
1676 TODO(maruel): Read command line arguments. See
1677 https://discussions.apple.com/thread/1980539 for an example.
1678 https://gist.github.com/1242279
1679
1680 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1681 :)
1682 """
1683 if not pid in self._process_lookup:
1684 # Another process tree, ignore.
1685 return
1686 match = self.RE_EXECVE.match(args)
1687 if not match:
1688 raise TracingFailure(
1689 'Failed to parse arguments: %r' % args,
1690 None, None, None)
1691 proc = self._process_lookup[pid]
1692 proc.executable = match.group(1)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001693 self._handle_file(pid, proc.executable, Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001694 proc.command = self.process_escaped_arguments(match.group(3))
1695 if int(match.group(2)) != len(proc.command):
1696 raise TracingFailure(
1697 'Failed to parse execve() arguments: %s' % args,
1698 None, None, None)
1699
1700 def handle_chdir(self, pid, args):
1701 """Updates cwd."""
1702 if pid not in self._process_lookup:
1703 # Another process tree, ignore.
1704 return
1705 cwd = self.RE_CHDIR.match(args).group(1)
1706 if not cwd.startswith('/'):
1707 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1708 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1709 else:
1710 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1711 cwd2 = cwd
1712 self._process_lookup[pid].cwd = cwd2
1713
1714 def handle_open_nocancel(self, pid, args):
1715 """Redirects to handle_open()."""
1716 return self.handle_open(pid, args)
1717
1718 def handle_open(self, pid, args):
1719 if pid not in self._process_lookup:
1720 # Another process tree, ignore.
1721 return
1722 match = self.RE_OPEN.match(args)
1723 if not match:
1724 raise TracingFailure(
1725 'Failed to parse arguments: %s' % args,
1726 None, None, None)
1727 flag = int(match.group(2), 16)
1728 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1729 # Ignore directories.
1730 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001731 self._handle_file(
1732 pid,
1733 match.group(1),
1734 Results.File.READ if not ((self.O_RDWR | self.O_WRONLY) & flag)
1735 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001736
1737 def handle_rename(self, pid, args):
1738 if pid not in self._process_lookup:
1739 # Another process tree, ignore.
1740 return
1741 match = self.RE_RENAME.match(args)
1742 if not match:
1743 raise TracingFailure(
1744 'Failed to parse arguments: %s' % args,
1745 None, None, None)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001746 self._handle_file(pid, match.group(1), Results.File.READ)
1747 self._handle_file(pid, match.group(2), Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001748
maruel@chromium.org538141b2013-06-03 20:57:17 +00001749 def _handle_file(self, pid, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001750 if not filepath.startswith('/'):
1751 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1752 # We can get '..' in the path.
1753 filepath = os.path.normpath(filepath)
1754 # Sadly, still need to filter out directories here;
1755 # saw open_nocancel(".", 0, 0) = 0 lines.
1756 if os.path.isdir(filepath):
1757 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001758 self._process_lookup[pid].add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001759
1760 def handle_ftruncate(self, pid, args):
1761 """Just used as a signal to kill dtrace, ignoring."""
1762 pass
1763
1764 @staticmethod
1765 def _handle_ignored(pid, args):
1766 """Is called for all the event traces that are not handled."""
1767 raise NotImplementedError('Please implement me')
1768
1769 @staticmethod
1770 def process_escaped_arguments(text):
1771 """Extracts escaped arguments on a string and return the arguments as a
1772 list.
1773
1774 Implemented as an automaton.
1775
1776 Example:
1777 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1778 function will return ['python2.7', '-c', 'print("hi")]
1779 """
1780 if not text.endswith('\\0'):
1781 raise ValueError('String is not null terminated: %r' % text, text)
1782 text = text[:-2]
1783
1784 def unescape(x):
1785 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1786 out = []
1787 escaped = False
1788 for i in x:
1789 if i == '\\' and not escaped:
1790 escaped = True
1791 continue
1792 escaped = False
1793 out.append(i)
1794 return ''.join(out)
1795
1796 return [unescape(i) for i in text.split('\\001')]
1797
1798 class Tracer(ApiBase.Tracer):
1799 # pylint: disable=C0301
1800 #
1801 # To understand the following code, you'll want to take a look at:
1802 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
1803 # https://wikis.oracle.com/display/DTrace/Variables
1804 # http://docs.oracle.com/cd/E19205-01/820-4221/
1805 #
1806 # 0. Dump all the valid probes into a text file. It is important, you
1807 # want to redirect into a file and you don't want to constantly 'sudo'.
1808 # $ sudo dtrace -l > probes.txt
1809 #
1810 # 1. Count the number of probes:
1811 # $ wc -l probes.txt
1812 # 81823 # On OSX 10.7, including 1 header line.
1813 #
1814 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1815 # likes and skipping the header with NR>1:
1816 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1817 # dtrace
1818 # fbt
1819 # io
1820 # ip
1821 # lockstat
1822 # mach_trap
1823 # proc
1824 # profile
1825 # sched
1826 # syscall
1827 # tcp
1828 # vminfo
1829 #
1830 # 3. List of valid probes:
1831 # $ grep syscall probes.txt | less
1832 # or use dtrace directly:
1833 # $ sudo dtrace -l -P syscall | less
1834 #
1835 # trackedpid is an associative array where its value can be 0, 1 or 2.
1836 # 0 is for untracked processes and is the default value for items not
1837 # in the associative array.
1838 # 1 is for tracked processes.
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001839 # 2 is for the script created by create_subprocess_thunk() only. It is not
1840 # tracked itself but all its decendants are.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001841 #
1842 # The script will kill itself only once waiting_to_die == 1 and
1843 # current_processes == 0, so that both getlogin() was called and that
1844 # all traced processes exited.
1845 #
1846 # TODO(maruel): Use cacheable predicates. See
1847 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1848 D_CODE = """
1849 dtrace:::BEGIN {
1850 waiting_to_die = 0;
1851 current_processes = 0;
1852 logindex = 0;
1853 printf("%d %d %s_%s(\\"%s\\")\\n",
1854 logindex, PID, probeprov, probename, SCRIPT);
1855 logindex++;
1856 }
1857
1858 proc:::start /trackedpid[ppid]/ {
1859 trackedpid[pid] = 1;
1860 current_processes += 1;
1861 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1862 logindex, pid, probeprov, probename,
1863 ppid,
1864 execname,
1865 current_processes);
1866 logindex++;
1867 }
1868 /* Should use SCRIPT but there is no access to this variable at that
1869 * point. */
1870 proc:::start /ppid == PID && execname == "Python"/ {
1871 trackedpid[pid] = 2;
1872 current_processes += 1;
1873 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1874 logindex, pid, probeprov, probename,
1875 ppid,
1876 execname,
1877 current_processes);
1878 logindex++;
1879 }
1880 proc:::exit /trackedpid[pid] &&
1881 current_processes == 1 &&
1882 waiting_to_die == 1/ {
1883 trackedpid[pid] = 0;
1884 current_processes -= 1;
1885 printf("%d %d %s_%s(%d)\\n",
1886 logindex, pid, probeprov, probename,
1887 current_processes);
1888 logindex++;
1889 exit(0);
1890 }
1891 proc:::exit /trackedpid[pid]/ {
1892 trackedpid[pid] = 0;
1893 current_processes -= 1;
1894 printf("%d %d %s_%s(%d)\\n",
1895 logindex, pid, probeprov, probename,
1896 current_processes);
1897 logindex++;
1898 }
1899
1900 /* Use an arcane function to detect when we need to die */
1901 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
1902 waiting_to_die = 1;
1903 printf("%d %d %s()\\n", logindex, pid, probefunc);
1904 logindex++;
1905 }
1906 syscall::ftruncate:entry /
1907 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
1908 exit(0);
1909 }
1910
1911 syscall::open*:entry /trackedpid[pid] == 1/ {
1912 self->open_arg0 = arg0;
1913 self->open_arg1 = arg1;
1914 self->open_arg2 = arg2;
1915 }
1916 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
1917 this->open_arg0 = copyinstr(self->open_arg0);
1918 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
1919 logindex, pid, probefunc,
1920 this->open_arg0,
1921 self->open_arg1,
1922 self->open_arg2);
1923 logindex++;
1924 this->open_arg0 = 0;
1925 }
1926 syscall::open*:return /trackedpid[pid] == 1/ {
1927 self->open_arg0 = 0;
1928 self->open_arg1 = 0;
1929 self->open_arg2 = 0;
1930 }
1931
1932 syscall::rename:entry /trackedpid[pid] == 1/ {
1933 self->rename_arg0 = arg0;
1934 self->rename_arg1 = arg1;
1935 }
1936 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
1937 this->rename_arg0 = copyinstr(self->rename_arg0);
1938 this->rename_arg1 = copyinstr(self->rename_arg1);
1939 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
1940 logindex, pid, probefunc,
1941 this->rename_arg0,
1942 this->rename_arg1);
1943 logindex++;
1944 this->rename_arg0 = 0;
1945 this->rename_arg1 = 0;
1946 }
1947 syscall::rename:return /trackedpid[pid] == 1/ {
1948 self->rename_arg0 = 0;
1949 self->rename_arg1 = 0;
1950 }
1951
1952 /* Track chdir, it's painful because it is only receiving relative path.
1953 */
1954 syscall::chdir:entry /trackedpid[pid] == 1/ {
1955 self->chdir_arg0 = arg0;
1956 }
1957 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
1958 this->chdir_arg0 = copyinstr(self->chdir_arg0);
1959 printf("%d %d %s(\\"%s\\")\\n",
1960 logindex, pid, probefunc,
1961 this->chdir_arg0);
1962 logindex++;
1963 this->chdir_arg0 = 0;
1964 }
1965 syscall::chdir:return /trackedpid[pid] == 1/ {
1966 self->chdir_arg0 = 0;
1967 }
1968 """
1969
1970 # execve-specific code, tends to throw a lot of exceptions.
1971 D_CODE_EXECVE = """
1972 /* Finally what we care about! */
1973 syscall::exec*:entry /trackedpid[pid]/ {
1974 self->exec_arg0 = copyinstr(arg0);
1975 /* Incrementally probe for a NULL in the argv parameter of execve() to
1976 * figure out argc. */
1977 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
1978 * found. */
1979 self->exec_argc = 0;
1980 /* Probe for argc==1 */
1981 this->exec_argv = (user_addr_t*)copyin(
1982 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1983 self->exec_argc = this->exec_argv[self->exec_argc] ?
1984 (self->exec_argc + 1) : self->exec_argc;
1985
1986 /* Probe for argc==2 */
1987 this->exec_argv = (user_addr_t*)copyin(
1988 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1989 self->exec_argc = this->exec_argv[self->exec_argc] ?
1990 (self->exec_argc + 1) : self->exec_argc;
1991
1992 /* Probe for argc==3 */
1993 this->exec_argv = (user_addr_t*)copyin(
1994 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1995 self->exec_argc = this->exec_argv[self->exec_argc] ?
1996 (self->exec_argc + 1) : self->exec_argc;
1997
1998 /* Probe for argc==4 */
1999 this->exec_argv = (user_addr_t*)copyin(
2000 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2001 self->exec_argc = this->exec_argv[self->exec_argc] ?
2002 (self->exec_argc + 1) : self->exec_argc;
2003
2004 /* Copy the inputs strings since there is no guarantee they'll be
2005 * present after the call completed. */
2006 self->exec_argv0 = (self->exec_argc > 0) ?
2007 copyinstr(this->exec_argv[0]) : "";
2008 self->exec_argv1 = (self->exec_argc > 1) ?
2009 copyinstr(this->exec_argv[1]) : "";
2010 self->exec_argv2 = (self->exec_argc > 2) ?
2011 copyinstr(this->exec_argv[2]) : "";
2012 self->exec_argv3 = (self->exec_argc > 3) ?
2013 copyinstr(this->exec_argv[3]) : "";
2014 this->exec_argv = 0;
2015 }
2016 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2017 /* We need to join strings here, as using multiple printf() would
2018 * cause tearing when multiple threads/processes are traced.
2019 * Since it is impossible to escape a string and join it to another one,
2020 * like sprintf("%s%S", previous, more), use hackery.
2021 * Each of the elements are split with a \\1. \\0 cannot be used because
2022 * it is simply ignored. This will conflict with any program putting a
2023 * \\1 in their execve() string but this should be "rare enough" */
2024 this->args = "";
2025 /* Process exec_argv[0] */
2026 this->args = strjoin(
2027 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2028
2029 /* Process exec_argv[1] */
2030 this->args = strjoin(
2031 this->args, (self->exec_argc > 1) ? "\\1" : "");
2032 this->args = strjoin(
2033 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2034
2035 /* Process exec_argv[2] */
2036 this->args = strjoin(
2037 this->args, (self->exec_argc > 2) ? "\\1" : "");
2038 this->args = strjoin(
2039 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2040
2041 /* Process exec_argv[3] */
2042 this->args = strjoin(
2043 this->args, (self->exec_argc > 3) ? "\\1" : "");
2044 this->args = strjoin(
2045 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2046
2047 /* Prints self->exec_argc to permits verifying the internal
2048 * consistency since this code is quite fishy. */
2049 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2050 logindex, pid, probefunc,
2051 self->exec_arg0,
2052 self->exec_argc,
2053 this->args);
2054 logindex++;
2055 this->args = 0;
2056 }
2057 syscall::exec*:return /trackedpid[pid]/ {
2058 self->exec_arg0 = 0;
2059 self->exec_argc = 0;
2060 self->exec_argv0 = 0;
2061 self->exec_argv1 = 0;
2062 self->exec_argv2 = 0;
2063 self->exec_argv3 = 0;
2064 }
2065 """
2066
2067 # Code currently not used.
2068 D_EXTRANEOUS = """
2069 /* This is a good learning experience, since it traces a lot of things
2070 * related to the process and child processes.
2071 * Warning: it generates a gigantic log. For example, tracing
2072 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2073 * several minutes to execute.
2074 */
2075 /*
2076 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2077 printf("%d %d %s_%s() = %d\\n",
2078 logindex, pid, probeprov, probefunc, errno);
2079 logindex++;
2080 }
2081 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2082 printf("%d %d %s_%s() = %d\\n",
2083 logindex, pid, probeprov, probefunc, errno);
2084 logindex++;
2085 }
2086 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2087 printf("%d %d %s_%s() = %d\\n",
2088 logindex, pid, probeprov, probefunc, errno);
2089 logindex++;
2090 }
2091 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2092 printf("%d %d %s_%s() = %d\\n",
2093 logindex, pid, probeprov, probefunc, errno);
2094 logindex++;
2095 }
2096 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2097 printf("%d %d %s_%s() = %d\\n",
2098 logindex, pid, probeprov, probefunc, errno);
2099 logindex++;
2100 }
2101 */
2102 /* TODO(maruel): *stat* functions and friends
2103 syscall::access:return,
2104 syscall::chdir:return,
2105 syscall::chflags:return,
2106 syscall::chown:return,
2107 syscall::chroot:return,
2108 syscall::getattrlist:return,
2109 syscall::getxattr:return,
2110 syscall::lchown:return,
2111 syscall::lstat64:return,
2112 syscall::lstat:return,
2113 syscall::mkdir:return,
2114 syscall::pathconf:return,
2115 syscall::readlink:return,
2116 syscall::removexattr:return,
2117 syscall::setxattr:return,
2118 syscall::stat64:return,
2119 syscall::stat:return,
2120 syscall::truncate:return,
2121 syscall::unlink:return,
2122 syscall::utimes:return,
2123 */
2124 """
2125
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002126 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002127 """Starts the log collection with dtrace.
2128
2129 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2130 this needs to wait for dtrace to be "warmed up".
2131 """
2132 super(Dtrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002133 # This script is used as a signal to figure out the root process.
2134 self._signal_script = create_subprocess_thunk()
2135 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002136 # This unique dummy temp file is used to signal the dtrace script that it
2137 # should stop as soon as all the child processes are done. A bit hackish
2138 # but works fine enough.
2139 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2140 prefix='trace_signal_file')
2141
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002142 dtrace_path = '/usr/sbin/dtrace'
2143 if not os.path.isfile(dtrace_path):
2144 dtrace_path = 'dtrace'
2145 elif use_sudo is None and (os.stat(dtrace_path).st_mode & stat.S_ISUID):
2146 # No need to sudo. For those following at home, don't do that.
2147 use_sudo = False
2148
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002149 # Note: do not use the -p flag. It's useless if the initial process quits
2150 # too fast, resulting in missing traces from the grand-children. The D
2151 # code manages the dtrace lifetime itself.
2152 trace_cmd = [
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002153 dtrace_path,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002154 # Use a larger buffer if getting 'out of scratch space' errors.
2155 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2156 '-b', '10m',
2157 '-x', 'dynvarsize=10m',
2158 #'-x', 'dtrace_global_maxsize=1m',
2159 '-x', 'evaltime=exec',
2160 '-o', '/dev/stderr',
2161 '-q',
2162 '-n', self._get_dtrace_code(),
2163 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002164 if use_sudo is not False:
2165 trace_cmd.insert(0, 'sudo')
2166
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002167 with open(self._logname + '.log', 'wb') as logfile:
2168 self._dtrace = subprocess.Popen(
2169 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2170 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2171
2172 # Reads until one line is printed, which signifies dtrace is up and ready.
2173 with open(self._logname + '.log', 'rb') as logfile:
2174 while 'dtrace_BEGIN' not in logfile.readline():
2175 if self._dtrace.poll() is not None:
2176 # Do a busy wait. :/
2177 break
2178 logging.debug('dtrace started')
2179
2180 def _get_dtrace_code(self):
2181 """Setups the D code to implement child process tracking.
2182
2183 Injects the cookie in the script so it knows when to stop.
2184
2185 The script will detect any instance of the script created with
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002186 create_subprocess_thunk() and will start tracing it.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002187 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002188 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002189 'inline int PID = %d;\n'
2190 'inline string SCRIPT = "%s";\n'
2191 'inline int FILE_ID = %d;\n'
2192 '\n'
2193 '%s') % (
2194 os.getpid(),
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002195 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002196 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002197 self.D_CODE)
2198 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2199 # Do not enable by default since it tends to spew dtrace: error lines
2200 # because the execve() parameters are not in valid memory at the time of
2201 # logging.
2202 # TODO(maruel): Find a way to make this reliable since it's useful but
2203 # only works in limited/trivial uses cases for now.
2204 out += self.D_CODE_EXECVE
2205 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002206
2207 def trace(self, cmd, cwd, tracename, output):
2208 """Runs dtrace on an executable.
2209
2210 This dtruss is broken when it starts the process itself or when tracing
2211 child processes, this code starts a wrapper process
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002212 generated with create_subprocess_thunk() which starts the executable to
2213 trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002214 """
2215 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2216 assert os.path.isabs(cmd[0]), cmd[0]
2217 assert os.path.isabs(cwd), cwd
2218 assert os.path.normpath(cwd) == cwd, cwd
2219 with self._lock:
2220 if not self._initialized:
2221 raise TracingFailure(
2222 'Called Tracer.trace() on an unitialized object',
2223 None, None, None, tracename)
2224 assert tracename not in (i['trace'] for i in self._traces)
2225
2226 # Starts the script wrapper to start the child process. This signals the
2227 # dtrace script that this process is to be traced.
2228 stdout = stderr = None
2229 if output:
2230 stdout = subprocess.PIPE
2231 stderr = subprocess.STDOUT
2232 child_cmd = [
2233 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002234 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002235 tracename,
2236 ]
2237 # Call a dummy function so that dtrace knows I'm about to launch a process
2238 # that needs to be traced.
2239 # Yummy.
2240 child = subprocess.Popen(
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00002241 child_cmd + tools.fix_python_path(cmd),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002242 stdin=subprocess.PIPE,
2243 stdout=stdout,
2244 stderr=stderr,
2245 cwd=cwd)
2246 logging.debug('Started child pid: %d' % child.pid)
2247
2248 out = child.communicate()[0]
2249 # This doesn't mean tracing is done, one of the grand-child process may
2250 # still be alive. It will be tracked with the dtrace script.
2251
2252 with self._lock:
2253 assert tracename not in (i['trace'] for i in self._traces)
2254 self._traces.append(
2255 {
2256 'cmd': cmd,
2257 'cwd': cwd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002258 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002259 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002260 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002261 })
2262 return child.returncode, out
2263
2264 def close(self, timeout=None):
2265 """Terminates dtrace."""
2266 logging.debug('close(%s)' % timeout)
2267 try:
2268 try:
2269 super(Dtrace.Tracer, self).close(timeout)
2270 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002271 # ftruncate doesn't exist on Windows.
2272 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002273 if timeout:
2274 start = time.time()
2275 # Use polling. :/
2276 while (self._dtrace.poll() is None and
2277 (time.time() - start) < timeout):
2278 time.sleep(0.1)
2279 self._dtrace.kill()
2280 self._dtrace.wait()
2281 finally:
2282 # Make sure to kill it in any case.
2283 if self._dtrace.poll() is None:
2284 try:
2285 self._dtrace.kill()
2286 self._dtrace.wait()
2287 except OSError:
2288 pass
2289
2290 if self._dtrace.returncode != 0:
2291 # Warn about any dtrace failure but basically ignore it.
2292 print 'dtrace failure: %s' % self._dtrace.returncode
2293 finally:
2294 os.close(self._dummy_file_id)
2295 os.remove(self._dummy_file_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002296
2297 def post_process_log(self):
2298 """Sorts the log back in order when each call occured.
2299
2300 dtrace doesn't save the buffer in strict order since it keeps one buffer
2301 per CPU.
2302 """
2303 super(Dtrace.Tracer, self).post_process_log()
2304 logname = self._logname + '.log'
2305 with open(logname, 'rb') as logfile:
2306 lines = [l for l in logfile if l.strip()]
2307 errors = [l for l in lines if l.startswith('dtrace:')]
2308 if errors:
2309 raise TracingFailure(
2310 'Found errors in the trace: %s' % '\n'.join(errors),
2311 None, None, None, logname)
2312 try:
2313 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2314 except ValueError:
2315 raise TracingFailure(
2316 'Found errors in the trace: %s' % '\n'.join(
2317 l for l in lines if l.split(' ', 1)[0].isdigit()),
2318 None, None, None, logname)
2319 with open(logname, 'wb') as logfile:
2320 logfile.write(''.join(lines))
2321
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002322 def __init__(self, use_sudo=None):
2323 super(Dtrace, self).__init__()
2324 self.use_sudo = use_sudo
2325
2326 def get_tracer(self, logname):
2327 return self.Tracer(logname, self.use_sudo)
2328
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002329 @staticmethod
2330 def clean_trace(logname):
2331 for ext in ('', '.log'):
2332 if os.path.isfile(logname + ext):
2333 os.remove(logname + ext)
2334
2335 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002336 def parse_log(cls, logname, blacklist, trace_name):
2337 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002338 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002339
2340 def blacklist_more(filepath):
2341 # All the HFS metadata is in the form /.vol/...
2342 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2343
2344 data = read_json(logname)
2345 out = []
2346 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002347 if trace_name and item['trace'] != trace_name:
2348 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002349 result = {
2350 'output': item['output'],
2351 'trace': item['trace'],
2352 }
2353 try:
2354 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002355 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2356 # be valid UTF-8 and we control the log output.
2357 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002358 context.on_line(line)
2359 result['results'] = context.to_results()
2360 except TracingFailure:
2361 result['exception'] = sys.exc_info()
2362 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002363 return out
2364
2365
2366class LogmanTrace(ApiBase):
2367 """Uses the native Windows ETW based tracing functionality to trace a child
2368 process.
2369
2370 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2371 the Windows Kernel doesn't have a concept of 'current working directory' at
2372 all. A Win32 process has a map of current directories, one per drive letter
2373 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2374 opened relative to another file_object or as an absolute path. All the current
2375 working directory logic is done in user mode.
2376 """
2377 class Context(ApiBase.Context):
2378 """Processes a ETW log line and keeps the list of existent and non
2379 existent files accessed.
2380
2381 Ignores directories.
2382 """
2383 # These indexes are for the stripped version in json.
2384 EVENT_NAME = 0
2385 TYPE = 1
2386 PID = 2
2387 TID = 3
2388 PROCESSOR_ID = 4
2389 TIMESTAMP = 5
2390 USER_DATA = 6
2391
2392 class Process(ApiBase.Context.Process):
2393 def __init__(self, *args):
2394 super(LogmanTrace.Context.Process, self).__init__(*args)
2395 # Handle file objects that succeeded.
2396 self.file_objects = {}
2397
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002398 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2399 logging.info(
2400 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2401 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002402 super(LogmanTrace.Context, self).__init__(blacklist)
maruel@chromium.org561d4b22013-09-26 21:08:08 +00002403 self._drive_map = file_path.DosDriveMap()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002404 # Threads mapping to the corresponding process id.
2405 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002406 # Process ID of the tracer, e.g. the temporary script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002407 # create_subprocess_thunk(). This is tricky because the process id may
2408 # have been reused.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002409 self._thunk_pid = thunk_pid
2410 self._thunk_cmd = thunk_cmd
2411 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002412 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002413 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002414
2415 def on_line(self, line):
2416 """Processes a json Event line."""
2417 self._line_number += 1
2418 try:
2419 # By Opcode
2420 handler = getattr(
2421 self,
2422 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2423 None)
2424 if not handler:
2425 raise TracingFailure(
2426 'Unexpected event %s_%s' % (
2427 line[self.EVENT_NAME], line[self.TYPE]),
2428 None, None, None)
2429 handler(line)
2430 except TracingFailure, e:
2431 # Hack in the values since the handler could be a static function.
2432 e.pid = line[self.PID]
2433 e.line = line
2434 e.line_number = self._line_number
2435 # Re-raise the modified exception.
2436 raise
2437 except (KeyError, NotImplementedError, ValueError), e:
2438 raise TracingFailure(
2439 'Trace generated a %s exception: %s' % (
2440 e.__class__.__name__, str(e)),
2441 line[self.PID],
2442 self._line_number,
2443 line,
2444 e)
2445
2446 def to_results(self):
2447 if not self.root_process:
2448 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002449 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002450 None, None, None)
2451 process = self.root_process.to_results_process()
2452 return Results(process)
2453
2454 def _thread_to_process(self, tid):
2455 """Finds the process from the thread id."""
2456 tid = int(tid, 16)
2457 pid = self._threads_active.get(tid)
2458 if not pid or not self._process_lookup.get(pid):
2459 return
2460 return self._process_lookup[pid]
2461
2462 @classmethod
2463 def handle_EventTrace_Header(cls, line):
2464 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2465 BUFFER_SIZE = cls.USER_DATA
2466 #VERSION = cls.USER_DATA + 1
2467 #PROVIDER_VERSION = cls.USER_DATA + 2
2468 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2469 #END_TIME = cls.USER_DATA + 4
2470 #TIMER_RESOLUTION = cls.USER_DATA + 5
2471 #MAX_FILE_SIZE = cls.USER_DATA + 6
2472 #LOG_FILE_MODE = cls.USER_DATA + 7
2473 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2474 #START_BUFFERS = cls.USER_DATA + 9
2475 #POINTER_SIZE = cls.USER_DATA + 10
2476 EVENTS_LOST = cls.USER_DATA + 11
2477 #CPU_SPEED = cls.USER_DATA + 12
2478 #LOGGER_NAME = cls.USER_DATA + 13
2479 #LOG_FILE_NAME = cls.USER_DATA + 14
2480 #BOOT_TIME = cls.USER_DATA + 15
2481 #PERF_FREQ = cls.USER_DATA + 16
2482 #START_TIME = cls.USER_DATA + 17
2483 #RESERVED_FLAGS = cls.USER_DATA + 18
2484 #BUFFERS_LOST = cls.USER_DATA + 19
2485 #SESSION_NAME_STRING = cls.USER_DATA + 20
2486 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2487 if line[EVENTS_LOST] != '0':
2488 raise TracingFailure(
2489 ( '%s events were lost during trace, please increase the buffer '
2490 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2491 None, None, None)
2492
2493 def handle_FileIo_Cleanup(self, line):
2494 """General wisdom: if a file is closed, it's because it was opened.
2495
2496 Note that FileIo_Close is not used since if a file was opened properly but
2497 not closed before the process exits, only Cleanup will be logged.
2498 """
2499 #IRP = self.USER_DATA
2500 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2501 FILE_OBJECT = self.USER_DATA + 2
2502 #FILE_KEY = self.USER_DATA + 3
2503 proc = self._thread_to_process(line[TTID])
2504 if not proc:
2505 # Not a process we care about.
2506 return
2507 file_object = line[FILE_OBJECT]
2508 if file_object in proc.file_objects:
maruel@chromium.org538141b2013-06-03 20:57:17 +00002509 filepath, access_type = proc.file_objects.pop(file_object)
2510 proc.add_file(filepath, access_type)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002511
2512 def handle_FileIo_Create(self, line):
2513 """Handles a file open.
2514
2515 All FileIo events are described at
2516 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2517 for some value of 'description'.
2518
2519 " (..) process and thread id values of the IO events (..) are not valid "
2520 http://msdn.microsoft.com/magazine/ee358703.aspx
2521
2522 The FileIo.Create event doesn't return if the CreateFile() call
2523 succeeded, so keep track of the file_object and check that it is
2524 eventually closed with FileIo_Cleanup.
2525 """
2526 #IRP = self.USER_DATA
2527 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2528 FILE_OBJECT = self.USER_DATA + 2
2529 #CREATE_OPTIONS = self.USER_DATA + 3
2530 #FILE_ATTRIBUTES = self.USER_DATA + 4
maruel@chromium.org538141b2013-06-03 20:57:17 +00002531 #SHARE_ACCESS = self.USER_DATA + 5
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002532 OPEN_PATH = self.USER_DATA + 6
2533
2534 proc = self._thread_to_process(line[TTID])
2535 if not proc:
2536 # Not a process we care about.
2537 return
2538
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002539 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002540 # Ignore directories and bare drive right away.
2541 if raw_path.endswith(os.path.sep):
2542 return
2543 filepath = self._drive_map.to_win32(raw_path)
2544 # Ignore bare drive right away. Some may still fall through with format
2545 # like '\\?\X:'
2546 if len(filepath) == 2:
2547 return
2548 file_object = line[FILE_OBJECT]
2549 if os.path.isdir(filepath):
2550 # There is no O_DIRECTORY equivalent on Windows. The closed is
2551 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2552 # simply discard directories are they are found.
2553 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002554 # Override any stale file object.
2555 # TODO(maruel): Figure out a way to detect if the file was opened for
2556 # reading or writting. Sadly CREATE_OPTIONS doesn't seem to be of any help
2557 # here. For now mark as None to make it clear we have no idea what it is
2558 # about.
2559 proc.file_objects[file_object] = (filepath, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002560
2561 def handle_FileIo_Rename(self, line):
2562 # TODO(maruel): Handle?
2563 pass
2564
2565 def handle_Process_End(self, line):
2566 pid = line[self.PID]
2567 if self._process_lookup.get(pid):
2568 logging.info('Terminated: %d' % pid)
2569 self._process_lookup[pid] = None
2570 else:
2571 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002572 if self._thunk_process and self._thunk_process.pid == pid:
2573 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002574
2575 def handle_Process_Start(self, line):
2576 """Handles a new child process started by PID."""
2577 #UNIQUE_PROCESS_KEY = self.USER_DATA
2578 PROCESS_ID = self.USER_DATA + 1
2579 #PARENT_PID = self.USER_DATA + 2
2580 #SESSION_ID = self.USER_DATA + 3
2581 #EXIT_STATUS = self.USER_DATA + 4
2582 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2583 #USER_SID = self.USER_DATA + 6
2584 IMAGE_FILE_NAME = self.USER_DATA + 7
2585 COMMAND_LINE = self.USER_DATA + 8
2586
2587 ppid = line[self.PID]
2588 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002589 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002590 logging.debug(
2591 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002592 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002593
2594 if pid == self._thunk_pid:
2595 # Need to ignore processes we don't know about because the log is
2596 # system-wide. self._thunk_pid shall start only one process.
2597 # This is tricky though because Windows *loves* to reuse process id and
2598 # it happens often that the process ID of the thunk script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002599 # create_subprocess_thunk() is reused. So just detecting the pid here is
2600 # not sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002601 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2602 logging.info(
2603 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2604 pid, self._trace_name, command_line, self._thunk_cmd)
2605 return
2606
2607 # TODO(maruel): The check is quite weak. Add the thunk path.
2608 if self._thunk_process:
2609 raise TracingFailure(
2610 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2611 'already set') % (self._thunk_pid, self._thunk_process.pid),
2612 None, None, None)
2613 proc = self.Process(self.blacklist, pid, None)
2614 self._thunk_process = proc
2615 return
2616 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002617 proc = self.Process(self.blacklist, pid, None)
2618 self.root_process = proc
2619 ppid = None
2620 elif self._process_lookup.get(ppid):
2621 proc = self.Process(self.blacklist, pid, None)
2622 self._process_lookup[ppid].children.append(proc)
2623 else:
2624 # Ignore
2625 return
2626 self._process_lookup[pid] = proc
2627
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002628 proc.command = command_line
2629 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002630 # proc.command[0] may be the absolute path of 'executable' but it may be
2631 # anything else too. If it happens that command[0] ends with executable,
2632 # use it, otherwise defaults to the base name.
2633 cmd0 = proc.command[0].lower()
2634 if not cmd0.endswith('.exe'):
2635 # TODO(maruel): That's not strictly true either.
2636 cmd0 += '.exe'
2637 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
2638 # Fix the path.
2639 cmd0 = cmd0.replace('/', os.path.sep)
2640 cmd0 = os.path.normpath(cmd0)
maruel@chromium.org561d4b22013-09-26 21:08:08 +00002641 proc.executable = file_path.get_native_path_case(cmd0)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002642 logging.info(
2643 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2644
2645 def handle_Thread_End(self, line):
2646 """Has the same parameters as Thread_Start."""
2647 tid = int(line[self.TID], 16)
2648 self._threads_active.pop(tid, None)
2649
2650 def handle_Thread_Start(self, line):
2651 """Handles a new thread created.
2652
2653 Do not use self.PID here since a process' initial thread is created by
2654 the parent process.
2655 """
2656 PROCESS_ID = self.USER_DATA
2657 TTHREAD_ID = self.USER_DATA + 1
2658 #STACK_BASE = self.USER_DATA + 2
2659 #STACK_LIMIT = self.USER_DATA + 3
2660 #USER_STACK_BASE = self.USER_DATA + 4
2661 #USER_STACK_LIMIT = self.USER_DATA + 5
2662 #AFFINITY = self.USER_DATA + 6
2663 #WIN32_START_ADDR = self.USER_DATA + 7
2664 #TEB_BASE = self.USER_DATA + 8
2665 #SUB_PROCESS_TAG = self.USER_DATA + 9
2666 #BASE_PRIORITY = self.USER_DATA + 10
2667 #PAGE_PRIORITY = self.USER_DATA + 11
2668 #IO_PRIORITY = self.USER_DATA + 12
2669 #THREAD_FLAGS = self.USER_DATA + 13
2670 # Do not use self.PID here since a process' initial thread is created by
2671 # the parent process.
2672 pid = int(line[PROCESS_ID], 16)
2673 tid = int(line[TTHREAD_ID], 16)
2674 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2675 self._threads_active[tid] = pid
2676
2677 @classmethod
2678 def supported_events(cls):
2679 """Returns all the procesed events."""
2680 out = []
2681 for member in dir(cls):
2682 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2683 if match:
2684 out.append(match.groups())
2685 return out
2686
2687 class Tracer(ApiBase.Tracer):
2688 # The basic headers.
2689 EXPECTED_HEADER = [
2690 u'Event Name',
2691 u'Type',
2692 u'Event ID',
2693 u'Version',
2694 u'Channel',
2695 u'Level', # 5
2696 u'Opcode',
2697 u'Task',
2698 u'Keyword',
2699 u'PID',
2700 u'TID', # 10
2701 u'Processor Number',
2702 u'Instance ID',
2703 u'Parent Instance ID',
2704 u'Activity ID',
2705 u'Related Activity ID', # 15
2706 u'Clock-Time',
2707 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2708 u'User(ms)', # pretty much useless.
2709 u'User Data', # Extra arguments that are event-specific.
2710 ]
2711 # Only the useful headers common to all entries are listed there. Any column
2712 # at 19 or higher is dependent on the specific event.
2713 EVENT_NAME = 0
2714 TYPE = 1
2715 PID = 9
2716 TID = 10
2717 PROCESSOR_ID = 11
2718 TIMESTAMP = 16
2719 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2720 USER_DATA = 19
2721
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002722 class CsvReader(object):
2723 """CSV reader that reads files generated by tracerpt.exe.
2724
2725 csv.reader() fails to read them properly, it mangles file names quoted
2726 with "" with a comma in it.
2727 """
2728 # 0. Had a ',' or one of the following ' ' after a comma, next should
2729 # be ' ', '"' or string or ',' for an empty field.
2730 ( HAD_DELIMITER,
2731 # 1. Processing an unquoted field up to ','.
2732 IN_STR,
2733 # 2. Processing a new field starting with '"'.
2734 STARTING_STR_QUOTED,
2735 # 3. Second quote in a row at the start of a field. It could be either
2736 # '""foo""' or '""'. Who the hell thought it was a great idea to use
2737 # the same character for delimiting and escaping?
2738 STARTING_SECOND_QUOTE,
2739 # 4. A quote inside a quoted string where the previous character was
2740 # not a quote, so the string is not empty. Can be either: end of a
2741 # quoted string (a delimiter) or a quote escape. The next char must be
2742 # either '"' or ','.
2743 HAD_QUOTE_IN_QUOTED,
2744 # 5. Second quote inside a quoted string.
2745 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
2746 # 6. Processing a field that started with '"'.
2747 IN_STR_QUOTED) = range(7)
2748
2749 def __init__(self, f):
2750 self.f = f
2751
2752 def __iter__(self):
2753 return self
2754
2755 def next(self):
2756 """Splits the line in fields."""
2757 line = self.f.readline()
2758 if not line:
2759 raise StopIteration()
2760 line = line.strip()
2761 fields = []
2762 state = self.HAD_DELIMITER
2763 for i, c in enumerate(line):
2764 if state == self.HAD_DELIMITER:
2765 if c == ',':
2766 # Empty field.
2767 fields.append('')
2768 elif c == ' ':
2769 # Ignore initial whitespaces
2770 pass
2771 elif c == '"':
2772 state = self.STARTING_STR_QUOTED
2773 fields.append('')
2774 else:
2775 # Start of a new field.
2776 state = self.IN_STR
2777 fields.append(c)
2778
2779 elif state == self.IN_STR:
2780 # Do not accept quote inside unquoted field.
2781 assert c != '"', (i, c, line, fields)
2782 if c == ',':
2783 fields[-1] = fields[-1].strip()
2784 state = self.HAD_DELIMITER
2785 else:
2786 fields[-1] = fields[-1] + c
2787
2788 elif state == self.STARTING_STR_QUOTED:
2789 if c == '"':
2790 # Do not store the character yet.
2791 state = self.STARTING_SECOND_QUOTE
2792 else:
2793 state = self.IN_STR_QUOTED
2794 fields[-1] = fields[-1] + c
2795
2796 elif state == self.STARTING_SECOND_QUOTE:
2797 if c == ',':
2798 # It was an empty field. '""' == ''.
2799 state = self.HAD_DELIMITER
2800 else:
2801 fields[-1] = fields[-1] + '"' + c
2802 state = self.IN_STR_QUOTED
2803
2804 elif state == self.HAD_QUOTE_IN_QUOTED:
2805 if c == ',':
2806 # End of the string.
2807 state = self.HAD_DELIMITER
2808 elif c == '"':
2809 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
2810 else:
2811 # The previous double-quote was just an unescaped quote.
2812 fields[-1] = fields[-1] + '"' + c
2813 state = self.IN_STR_QUOTED
2814
2815 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2816 if c == ',':
2817 # End of the string.
2818 state = self.HAD_DELIMITER
2819 fields[-1] = fields[-1] + '"'
2820 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00002821 # That's just how the logger rolls. Revert back to appending the
2822 # char and "guess" it was a quote in a double-quoted string.
2823 state = self.IN_STR_QUOTED
2824 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002825
2826 elif state == self.IN_STR_QUOTED:
2827 if c == '"':
2828 # Could be a delimiter or an escape.
2829 state = self.HAD_QUOTE_IN_QUOTED
2830 else:
2831 fields[-1] = fields[-1] + c
2832
2833 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2834 fields[-1] = fields[-1] + '"'
2835 else:
2836 assert state in (
2837 # Terminated with a normal field.
2838 self.IN_STR,
2839 # Terminated with an empty field.
2840 self.STARTING_SECOND_QUOTE,
2841 # Terminated with a normal quoted field.
2842 self.HAD_QUOTE_IN_QUOTED), (
2843 line, state, fields)
2844 return fields
2845
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002846 def __init__(self, logname):
2847 """Starts the log collection.
2848
2849 Requires administrative access. logman.exe is synchronous so no need for a
2850 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2851 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2852
2853 One can get the list of potentially interesting providers with:
2854 "logman query providers | findstr /i file"
2855 """
2856 super(LogmanTrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002857 self._signal_script = create_subprocess_thunk()
2858 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002859 cmd_start = [
2860 'logman.exe',
2861 'start',
2862 'NT Kernel Logger',
2863 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2864 # splitio,fileiocompletion,syscall,file,cswitch,img
2865 '(process,fileio,thread)',
2866 '-o', self._logname + '.etl',
2867 '-ets', # Send directly to kernel
2868 # Values extracted out of thin air.
2869 # Event Trace Session buffer size in kb.
2870 '-bs', '10240',
2871 # Number of Event Trace Session buffers.
2872 '-nb', '16', '256',
2873 ]
2874 logging.debug('Running: %s' % cmd_start)
2875 try:
2876 subprocess.check_call(
2877 cmd_start,
2878 stdin=subprocess.PIPE,
2879 stdout=subprocess.PIPE,
2880 stderr=subprocess.STDOUT)
2881 except subprocess.CalledProcessError, e:
2882 if e.returncode == -2147024891:
2883 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2884 elif e.returncode == -2144337737:
2885 print >> sys.stderr, (
2886 'A kernel trace was already running, stop it and try again')
2887 raise
2888
2889 def trace(self, cmd, cwd, tracename, output):
2890 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2891 assert os.path.isabs(cmd[0]), cmd[0]
2892 assert os.path.isabs(cwd), cwd
2893 assert os.path.normpath(cwd) == cwd, cwd
2894 with self._lock:
2895 if not self._initialized:
2896 raise TracingFailure(
2897 'Called Tracer.trace() on an unitialized object',
2898 None, None, None, tracename)
2899 assert tracename not in (i['trace'] for i in self._traces)
2900
2901 # Use "logman -?" for help.
2902
2903 stdout = stderr = None
2904 if output:
2905 stdout = subprocess.PIPE
2906 stderr = subprocess.STDOUT
2907
2908 # Run the child process.
2909 logging.debug('Running: %s' % cmd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002910 # Use the temporary script generated with create_subprocess_thunk() so we
2911 # have a clear pid owner. Since trace_inputs.py can be used as a library
2912 # and could trace multiple processes simultaneously, it makes it more
2913 # complex if the executable to be traced is executed directly here. It
2914 # also solves issues related to logman.exe that needs to be executed to
2915 # control the kernel trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002916 child_cmd = [
2917 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002918 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002919 tracename,
2920 ]
2921 child = subprocess.Popen(
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00002922 child_cmd + tools.fix_python_path(cmd),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002923 cwd=cwd,
2924 stdin=subprocess.PIPE,
2925 stdout=stdout,
2926 stderr=stderr)
2927 logging.debug('Started child pid: %d' % child.pid)
2928 out = child.communicate()[0]
2929 # This doesn't mean all the grand-children are done. Sadly, we don't have
2930 # a good way to determine that.
2931
2932 with self._lock:
2933 assert tracename not in (i['trace'] for i in self._traces)
2934 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002935 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002936 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002937 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002938 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002939 # Used to figure out the real process when process ids are reused.
2940 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002941 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002942 })
2943
2944 return child.returncode, out
2945
2946 def close(self, _timeout=None):
2947 """Stops the kernel log collection and converts the traces to text
2948 representation.
2949 """
2950 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00002951 try:
2952 super(LogmanTrace.Tracer, self).close()
2953 finally:
2954 cmd_stop = [
2955 'logman.exe',
2956 'stop',
2957 'NT Kernel Logger',
2958 '-ets', # Sends the command directly to the kernel.
2959 ]
2960 logging.debug('Running: %s' % cmd_stop)
2961 subprocess.check_call(
2962 cmd_stop,
2963 stdin=subprocess.PIPE,
2964 stdout=subprocess.PIPE,
2965 stderr=subprocess.STDOUT)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002966
2967 def post_process_log(self):
2968 """Converts the .etl file into .csv then into .json."""
2969 super(LogmanTrace.Tracer, self).post_process_log()
2970 logformat = 'csv'
2971 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002972 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002973
maruel@chromium.org808e3f22013-05-16 16:42:54 +00002974 def _gen_logdata(self):
2975 return {
2976 'format': 'csv',
2977 'traces': self._traces,
2978 }
2979
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002980 def _trim_log(self, logformat):
2981 """Reduces the amount of data in original log by generating a 'reduced'
2982 log.
2983 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002984 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002985 file_handle = codecs.open(
2986 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002987
2988 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002989 assert sys.getfilesystemencoding() == 'mbcs'
2990 file_handle = codecs.open(
2991 self._logname + '.' + logformat, 'r',
2992 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002993
2994 supported_events = LogmanTrace.Context.supported_events()
2995
2996 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002997 """Loads items from the generator and returns the interesting data.
2998
2999 It filters out any uninteresting line and reduce the amount of data in
3000 the trace.
3001 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003002 for index, line in enumerate(generator):
3003 if not index:
3004 if line != self.EXPECTED_HEADER:
3005 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003006 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003007 None, None, None)
3008 continue
3009 # As you can see, the CSV is full of useful non-redundant information:
3010 if (line[2] != '0' or # Event ID
3011 line[3] not in ('2', '3') or # Version
3012 line[4] != '0' or # Channel
3013 line[5] != '0' or # Level
3014 line[7] != '0' or # Task
3015 line[8] != '0x0000000000000000' or # Keyword
3016 line[12] != '' or # Instance ID
3017 line[13] != '' or # Parent Instance ID
3018 line[14] != self.NULL_GUID or # Activity ID
3019 line[15] != ''): # Related Activity ID
3020 raise TracingFailure(
3021 'Found unexpected values in line: %s' % ' '.join(line),
3022 None, None, None)
3023
3024 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3025 continue
3026
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003027 yield [
3028 line[self.EVENT_NAME],
3029 line[self.TYPE],
3030 line[self.PID],
3031 line[self.TID],
3032 line[self.PROCESSOR_ID],
3033 line[self.TIMESTAMP],
3034 ] + line[self.USER_DATA:]
3035
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003036 # must not convert the trim() call into a list, since it will use too much
3037 # memory for large trace. use a csv file as a workaround since the json
3038 # parser requires a complete in-memory file.
3039 with open('%s.preprocessed' % self._logname, 'wb') as f:
3040 # $ and * can't be used in file name on windows, reducing the likelihood
3041 # of having to escape a string.
3042 out = csv.writer(
3043 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003044 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003045 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003046
3047 def _convert_log(self, logformat):
3048 """Converts the ETL trace to text representation.
3049
3050 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3051 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3052
3053 Arguments:
3054 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3055
3056 Use "tracerpt -?" for help.
3057 """
3058 LOCALE_INVARIANT = 0x7F
3059 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3060 cmd_convert = [
3061 'tracerpt.exe',
3062 '-l', self._logname + '.etl',
3063 '-o', self._logname + '.' + logformat,
3064 '-gmt', # Use UTC
3065 '-y', # No prompt
3066 # Use -of XML to get the header of each items after column 19, e.g. all
3067 # the actual headers of 'User Data'.
3068 ]
3069
3070 if logformat == 'csv':
3071 # tracerpt localizes the 'Type' column, for major brainfuck
3072 # entertainment. I can't imagine any sane reason to do that.
3073 cmd_convert.extend(['-of', 'CSV'])
3074 elif logformat == 'csv_utf16':
3075 # This causes it to use UTF-16, which doubles the log size but ensures
3076 # the log is readable for non-ASCII characters.
3077 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3078 elif logformat == 'xml':
3079 cmd_convert.extend(['-of', 'XML'])
3080 else:
3081 raise ValueError('Unexpected log format \'%s\'' % logformat)
3082 logging.debug('Running: %s' % cmd_convert)
3083 # This can takes tens of minutes for large logs.
3084 # Redirects all output to stderr.
3085 subprocess.check_call(
3086 cmd_convert,
3087 stdin=subprocess.PIPE,
3088 stdout=sys.stderr,
3089 stderr=sys.stderr)
3090
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003091 def __init__(self, use_sudo=False): # pylint: disable=W0613
3092 super(LogmanTrace, self).__init__()
3093 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3094
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003095 @staticmethod
3096 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003097 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003098 if os.path.isfile(logname + ext):
3099 os.remove(logname + ext)
3100
3101 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003102 def parse_log(cls, logname, blacklist, trace_name):
3103 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003104 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003105
3106 def blacklist_more(filepath):
3107 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3108 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3109
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003110 # Create a list of (Context, result_dict) tuples. This is necessary because
3111 # the csv file may be larger than the amount of available memory.
3112 contexes = [
3113 (
3114 cls.Context(
3115 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3116 {
3117 'output': item['output'],
3118 'trace': item['trace'],
3119 },
3120 )
3121 for item in read_json(logname)['traces']
3122 if not trace_name or item['trace'] == trace_name
3123 ]
3124
3125 # The log may be too large to fit in memory and it is not efficient to read
3126 # it multiple times, so multiplex the contexes instead, which is slightly
3127 # more awkward.
3128 with open('%s.preprocessed' % logname, 'rb') as f:
3129 lines = csv.reader(
3130 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3131 for encoded in lines:
3132 line = [s.decode('utf-8') for s in encoded]
3133 # Convert the PID in-place from hex.
3134 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3135 for context in contexes:
3136 if 'exception' in context[1]:
3137 continue
3138 try:
3139 context[0].on_line(line)
3140 except TracingFailure:
3141 context[1]['exception'] = sys.exc_info()
3142
3143 for context in contexes:
3144 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003145 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003146 context[1]['results'] = context[0].to_results()
3147
3148 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003149
3150
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003151def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003152 """Returns the correct implementation for the current OS."""
3153 if sys.platform == 'cygwin':
3154 raise NotImplementedError(
3155 'Not implemented for cygwin, start the script from Win32 python')
3156 flavors = {
3157 'win32': LogmanTrace,
3158 'darwin': Dtrace,
3159 'sunos5': Dtrace,
3160 'freebsd7': Dtrace,
3161 'freebsd8': Dtrace,
3162 }
3163 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003164 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003165
3166
3167def extract_directories(root_dir, files, blacklist):
3168 """Detects if all the files in a directory are in |files| and if so, replace
3169 the individual files by a Results.Directory instance.
3170
3171 Takes a list of Results.File instances and returns a shorter list of
3172 Results.File and Results.Directory instances.
3173
3174 Arguments:
3175 - root_dir: Optional base directory that shouldn't be search further.
3176 - files: list of Results.File instances.
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003177 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003178 """
3179 logging.info(
3180 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3181 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003182 # It is important for root_dir to not be a symlinked path, make sure to call
3183 # os.path.realpath() as needed.
3184 assert not root_dir or (
maruel@chromium.org561d4b22013-09-26 21:08:08 +00003185 os.path.realpath(file_path.get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003186 assert not any(isinstance(f, Results.Directory) for f in files)
3187 # Remove non existent files.
3188 files = [f for f in files if f.existent]
3189 if not files:
3190 return files
3191 # All files must share the same root, which can be None.
3192 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3193
3194 # Creates a {directory: {filename: File}} mapping, up to root.
3195 buckets = {}
3196 if root_dir:
3197 buckets[root_dir] = {}
3198 for fileobj in files:
3199 path = fileobj.full_path
3200 directory = os.path.dirname(path)
3201 assert directory
3202 # Do not use os.path.basename() so trailing os.path.sep is kept.
3203 basename = path[len(directory)+1:]
3204 files_in_directory = buckets.setdefault(directory, {})
3205 files_in_directory[basename] = fileobj
3206 # Add all the directories recursively up to root.
3207 while True:
3208 old_d = directory
3209 directory = os.path.dirname(directory)
3210 if directory + os.path.sep == root_dir or directory == old_d:
3211 break
3212 buckets.setdefault(directory, {})
3213
3214 root_prefix = len(root_dir) + 1 if root_dir else 0
3215 for directory in sorted(buckets, reverse=True):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003216 if not os.path.isdir(directory):
3217 logging.debug(
3218 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3219 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003220 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3221 expected = set(buckets[directory])
3222 if not (actual - expected):
3223 parent = os.path.dirname(directory)
3224 buckets[parent][os.path.basename(directory)] = Results.Directory(
3225 root_dir,
3226 directory[root_prefix:],
3227 False,
3228 sum(f.size for f in buckets[directory].itervalues()),
3229 sum(f.nb_files for f in buckets[directory].itervalues()))
3230 # Remove the whole bucket.
3231 del buckets[directory]
3232
3233 # Reverse the mapping with what remains. The original instances are returned,
3234 # so the cached meta data is kept.
3235 files = sum((x.values() for x in buckets.itervalues()), [])
3236 return sorted(files, key=lambda x: x.path)
3237
3238
3239def trace(logfile, cmd, cwd, api, output):
3240 """Traces an executable. Returns (returncode, output) from api.
3241
3242 Arguments:
3243 - logfile: file to write to.
3244 - cmd: command to run.
3245 - cwd: current directory to start the process in.
3246 - api: a tracing api instance.
3247 - output: if True, returns output, otherwise prints it at the console.
3248 """
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003249 cmd = tools.fix_python_path(cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003250 api.clean_trace(logfile)
3251 with api.get_tracer(logfile) as tracer:
3252 return tracer.trace(cmd, cwd, 'default', output)
3253
3254
maruel@chromium.orge5322512013-08-19 20:17:57 +00003255def CMDclean(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003256 """Cleans up traces."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003257 options, args = parser.parse_args(args)
3258 api = get_api()
3259 api.clean_trace(options.log)
3260 return 0
3261
3262
maruel@chromium.orge5322512013-08-19 20:17:57 +00003263def CMDtrace(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003264 """Traces an executable."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003265 parser.allow_interspersed_args = False
3266 parser.add_option(
3267 '-q', '--quiet', action='store_true',
3268 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003269 parser.add_option(
3270 '-s', '--sudo', action='store_true',
3271 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3272 parser.add_option(
3273 '-n', '--no-sudo', action='store_false',
3274 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003275 options, args = parser.parse_args(args)
3276
3277 if not args:
3278 parser.error('Please provide a command to run')
3279
Marc-Antoine Ruel5906fcd2013-11-08 21:02:24 -05003280 if not can_trace():
3281 parser.error('Please rerun this program with admin privileges')
3282
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003283 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3284 args[0] = os.path.abspath(args[0])
3285
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003286 # options.sudo default value is None, which is to do whatever tracer defaults
3287 # do.
3288 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003289 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3290
3291
maruel@chromium.orge5322512013-08-19 20:17:57 +00003292def CMDread(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003293 """Reads the logs and prints the result."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003294 parser.add_option(
3295 '-V', '--variable',
3296 nargs=2,
3297 action='append',
3298 dest='variables',
3299 metavar='VAR_NAME directory',
3300 default=[],
3301 help=('Variables to replace relative directories against. Example: '
3302 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3303 'home dir with $HOME') % getpass.getuser())
3304 parser.add_option(
3305 '--root-dir',
3306 help='Root directory to base everything off it. Anything outside of this '
3307 'this directory will not be reported')
3308 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003309 '--trace-name',
3310 help='Only reads one of the trace. Defaults to reading all traces')
3311 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003312 '-j', '--json', action='store_true',
3313 help='Outputs raw result data as json')
3314 parser.add_option(
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003315 '--trace-blacklist', action='append', default=[],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003316 help='List of regexp to use as blacklist filter')
3317 options, args = parser.parse_args(args)
3318
3319 if options.root_dir:
maruel@chromium.org561d4b22013-09-26 21:08:08 +00003320 options.root_dir = file_path.get_native_path_case(
maruel@chromium.org7dc17b32013-04-02 22:27:05 +00003321 unicode(os.path.abspath(options.root_dir)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003322
3323 variables = dict(options.variables)
3324 api = get_api()
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003325 blacklist = gen_blacklist(options.trace_blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003326 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003327 # Process each trace.
3328 output_as_json = []
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003329 try:
3330 for item in data:
3331 if 'exception' in item:
3332 # Do not abort the other traces.
3333 print >> sys.stderr, (
3334 'Trace %s: Got an exception: %s' % (
3335 item['trace'], item['exception'][1]))
3336 continue
3337 results = item['results']
3338 if options.root_dir:
3339 results = results.strip_root(options.root_dir)
3340
3341 if options.json:
3342 output_as_json.append(results.flatten())
3343 else:
3344 simplified = extract_directories(
3345 options.root_dir, results.files, blacklist)
3346 simplified = [f.replace_variables(variables) for f in simplified]
3347 if len(data) > 1:
3348 print('Trace: %s' % item['trace'])
3349 print('Total: %d' % len(results.files))
3350 print('Non existent: %d' % len(results.non_existent))
3351 for f in results.non_existent:
3352 print(' %s' % f.path)
3353 print(
3354 'Interesting: %d reduced to %d' % (
3355 len(results.existent), len(simplified)))
3356 for f in simplified:
3357 print(' %s' % f.path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003358
3359 if options.json:
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003360 write_json(sys.stdout, output_as_json, False)
3361 except KeyboardInterrupt:
3362 return 1
3363 except IOError as e:
3364 if e.errno == errno.EPIPE:
3365 # Do not print a stack trace when the output is piped to less and the user
3366 # quits before the whole output was written.
3367 return 1
3368 raise
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003369 return 0
3370
3371
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003372class OptionParserTraceInputs(tools.OptionParserWithLogging):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003373 """Adds automatic --log handling."""
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003374
3375 # Disable --log-file options since both --log and --log-file options are
3376 # confusing.
3377 # TODO(vadimsh): Rename --log-file or --log to something else.
3378 enable_log_file = False
3379
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003380 def __init__(self, **kwargs):
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003381 tools.OptionParserWithLogging.__init__(self, **kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003382 self.add_option(
3383 '-l', '--log', help='Log file to generate or read, required')
3384
3385 def parse_args(self, *args, **kwargs):
3386 """Makes sure the paths make sense.
3387
3388 On Windows, / and \ are often mixed together in a path.
3389 """
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003390 options, args = tools.OptionParserWithLogging.parse_args(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003391 self, *args, **kwargs)
3392 if not options.log:
3393 self.error('Must supply a log file with -l')
3394 options.log = os.path.abspath(options.log)
3395 return options, args
3396
3397
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003398def main(argv):
maruel@chromium.orge5322512013-08-19 20:17:57 +00003399 dispatcher = subcommand.CommandDispatcher(__name__)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003400 try:
maruel@chromium.org3d671992013-08-20 00:38:27 +00003401 return dispatcher.execute(
3402 OptionParserTraceInputs(version=__version__), argv)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003403 except TracingFailure, e:
3404 sys.stderr.write('\nError: ')
3405 sys.stderr.write(str(e))
3406 sys.stderr.write('\n')
3407 return 1
3408
3409
3410if __name__ == '__main__':
maruel@chromium.orge5322512013-08-19 20:17:57 +00003411 fix_encoding.fix_encoding()
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003412 tools.disable_buffering()
maruel@chromium.orge5322512013-08-19 20:17:57 +00003413 colorama.init()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003414 sys.exit(main(sys.argv[1:]))