blob: a444ccc6e4bb084032735b4dc50db18c6b830bec [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
maruelea586f32016-04-05 11:11:33 -07003# Copyright 2012 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07004# Use of this source code is governed under the Apache License, Version 2.0
5# that can be found in the LICENSE file.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00006
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
maruel@chromium.org9e56b592013-05-21 16:56:05 +000023import errno
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000024import getpass
25import glob
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000026import logging
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000027import os
28import re
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000029import stat
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000030import subprocess
31import sys
32import tempfile
33import threading
34import time
35import weakref
36
maruel@chromium.orge5322512013-08-19 20:17:57 +000037from third_party import colorama
38from third_party.depot_tools import fix_encoding
39from third_party.depot_tools import subcommand
40
maruel@chromium.org561d4b22013-09-26 21:08:08 +000041from utils import file_path
maruel12e30012015-10-09 11:55:35 -070042from utils import fs
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -040043from utils import logging_utils
maruel8e4e40c2016-05-30 06:21:07 -070044from utils import subprocess42
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000045from utils import tools
46
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000047## OS-specific imports
48
49if sys.platform == 'win32':
maruel@chromium.org561d4b22013-09-26 21:08:08 +000050 from ctypes.wintypes import byref, c_int, c_wchar_p
51 from ctypes.wintypes import windll # pylint: disable=E0611
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000052
53
maruel12e30012015-10-09 11:55:35 -070054__version__ = '0.2'
maruel@chromium.org3d671992013-08-20 00:38:27 +000055
56
tansella4949442016-06-23 22:34:32 -070057BASE_DIR = os.path.dirname(os.path.abspath(
58 __file__.decode(sys.getfilesystemencoding())))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000059ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
60
61
62class TracingFailure(Exception):
63 """An exception occured during tracing."""
64 def __init__(self, description, pid, line_number, line, *args):
65 super(TracingFailure, self).__init__(
66 description, pid, line_number, line, *args)
67 self.description = description
68 self.pid = pid
69 self.line_number = line_number
70 self.line = line
71 self.extra = args
72
73 def __str__(self):
74 out = self.description
75 if self.pid:
76 out += '\npid: %d' % self.pid
77 if self.line_number:
78 out += '\nline: %d' % self.line_number
79 if self.line:
80 out += '\n%s' % self.line
81 if self.extra:
82 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
83 return out
84
85
86## OS-specific functions
87
88if sys.platform == 'win32':
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000089 def get_current_encoding():
90 """Returns the 'ANSI' code page associated to the process."""
91 return 'cp%d' % int(windll.kernel32.GetACP())
92
93
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000094 def CommandLineToArgvW(command_line):
95 """Splits a commandline into argv using CommandLineToArgvW()."""
96 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
97 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +000098 assert isinstance(command_line, unicode)
99 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000100 try:
101 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
102 finally:
103 windll.kernel32.LocalFree(ptr)
104
105
Marc-Antoine Ruel5906fcd2013-11-08 21:02:24 -0500106def can_trace():
107 """Returns True if the user is an administrator on Windows.
108
109 It is required for tracing to work.
110 """
111 if sys.platform != 'win32':
112 return True
113 return bool(windll.shell32.IsUserAnAdmin())
114
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000115
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000116def create_subprocess_thunk():
117 """Creates a small temporary script to start the child process.
118
119 This thunk doesn't block, its unique name is used to identify it as the
120 parent.
121 """
Marc-Antoine Ruel3c979cb2015-03-11 13:43:28 -0400122 handle, name = tempfile.mkstemp(prefix=u'trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000123 try:
124 os.write(
125 handle,
126 (
127 'import subprocess, sys\n'
128 'sys.exit(subprocess.call(sys.argv[2:]))\n'
129 ))
130 finally:
131 os.close(handle)
132 return name
133
134
135def create_exec_thunk():
136 """Creates a small temporary script to start the child executable.
137
138 Reads from the file handle provided as the fisrt argument to block, then
139 execv() the command to be traced.
140 """
Marc-Antoine Ruel3c979cb2015-03-11 13:43:28 -0400141 handle, name = tempfile.mkstemp(prefix=u'trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000142 try:
143 os.write(
144 handle,
145 (
146 'import os, sys\n'
147 'fd = int(sys.argv[1])\n'
148 # This will block until the controlling process writes a byte on the
149 # pipe. It will do so once the tracing tool, e.g. strace, is ready to
150 # trace.
151 'os.read(fd, 1)\n'
152 'os.close(fd)\n'
153 'os.execv(sys.argv[2], sys.argv[2:])\n'
154 ))
155 finally:
156 os.close(handle)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000157 return name
158
159
160def strace_process_quoted_arguments(text):
161 """Extracts quoted arguments on a string and return the arguments as a list.
162
163 Implemented as an automaton. Supports incomplete strings in the form
164 '"foo"...'.
165
166 Example:
167 With text = '"foo", "bar"', the function will return ['foo', 'bar']
168
169 TODO(maruel): Implement escaping.
170 """
171 # All the possible states of the DFA.
172 ( NEED_QUOTE, # Begining of a new arguments.
173 INSIDE_STRING, # Inside an argument.
174 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
175 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
176 # a serie of 3 dots or a comma.
177 NEED_SPACE, # Right after a comma
178 NEED_DOT_2, # Found a dot, need a second one.
179 NEED_DOT_3, # Found second dot, need a third one.
180 NEED_COMMA, # Found third dot, need a comma.
181 ) = range(8)
182
183 state = NEED_QUOTE
184 out = []
185 for index, char in enumerate(text):
186 if char == '"':
187 if state == NEED_QUOTE:
188 state = INSIDE_STRING
189 # A new argument was found.
190 out.append('')
191 elif state == INSIDE_STRING:
192 # The argument is now closed.
193 state = NEED_COMMA_OR_DOT
194 elif state == ESCAPED:
195 out[-1] += char
196 state = INSIDE_STRING
197 else:
198 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000199 'Can\'t process char \'%s\' at column %d for: %r' % (
200 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000201 index,
202 text)
203 elif char == ',':
204 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
205 state = NEED_SPACE
206 elif state == INSIDE_STRING:
207 out[-1] += char
208 elif state == ESCAPED:
209 out[-1] += char
210 state = INSIDE_STRING
211 else:
212 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000213 'Can\'t process char \'%s\' at column %d for: %r' % (
214 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000215 index,
216 text)
217 elif char == ' ':
218 if state == NEED_SPACE:
219 state = NEED_QUOTE
220 elif state == INSIDE_STRING:
221 out[-1] += char
222 elif state == ESCAPED:
223 out[-1] += char
224 state = INSIDE_STRING
225 else:
226 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000227 'Can\'t process char \'%s\' at column %d for: %r' % (
228 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000229 index,
230 text)
231 elif char == '.':
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000232 if state in (NEED_QUOTE, NEED_COMMA_OR_DOT):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000233 # The string is incomplete, this mean the strace -s flag should be
234 # increased.
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000235 # For NEED_QUOTE, the input string would look like '"foo", ...'.
236 # For NEED_COMMA_OR_DOT, the input string would look like '"foo"...'
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000237 state = NEED_DOT_2
238 elif state == NEED_DOT_2:
239 state = NEED_DOT_3
240 elif state == NEED_DOT_3:
241 state = NEED_COMMA
242 elif state == INSIDE_STRING:
243 out[-1] += char
244 elif state == ESCAPED:
245 out[-1] += char
246 state = INSIDE_STRING
247 else:
248 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000249 'Can\'t process char \'%s\' at column %d for: %r' % (
250 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000251 index,
252 text)
253 elif char == '\\':
254 if state == ESCAPED:
255 out[-1] += char
256 state = INSIDE_STRING
257 elif state == INSIDE_STRING:
258 state = ESCAPED
259 else:
260 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000261 'Can\'t process char \'%s\' at column %d for: %r' % (
262 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000263 index,
264 text)
265 else:
266 if state == INSIDE_STRING:
267 out[-1] += char
268 else:
269 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000270 'Can\'t process char \'%s\' at column %d for: %r' % (
271 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000272 index,
273 text)
274 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
275 raise ValueError(
276 'String is incorrectly terminated: %r' % text,
277 text)
278 return out
279
280
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000281def assert_is_renderable(pseudo_string):
282 """Asserts the input is a valid object to be processed by render()."""
283 assert (
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000284 pseudo_string is None or
285 isinstance(pseudo_string, unicode) or
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000286 hasattr(pseudo_string, 'render')), repr(pseudo_string)
287
288
289def render(pseudo_string):
290 """Converts the pseudo-string to an unicode string."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000291 if pseudo_string is None or isinstance(pseudo_string, unicode):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000292 return pseudo_string
293 return pseudo_string.render()
294
295
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000296class Results(object):
297 """Results of a trace session."""
298
299 class _TouchedObject(object):
300 """Something, a file or a directory, that was accessed."""
301 def __init__(self, root, path, tainted, size, nb_files):
302 logging.debug(
303 '%s(%s, %s, %s, %s, %s)' %
304 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000305 assert_is_renderable(root)
306 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000307 self.root = root
308 self.path = path
309 self.tainted = tainted
310 self.nb_files = nb_files
maruel@chromium.org538141b2013-06-03 20:57:17 +0000311 # Can be used as a cache or a default value, depending on context. In
312 # particular, once self.tainted is True, because the path was replaced
313 # with a variable, it is not possible to look up the file size.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000314 self._size = size
315 # These are cache only.
316 self._real_path = None
317
318 # Check internal consistency.
319 assert path, path
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000320 assert tainted or bool(root) != bool(file_path.isabs(path)), (root, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000321 assert tainted or (
maruel12e30012015-10-09 11:55:35 -0700322 not fs.exists(self.full_path) or
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000323 (self.full_path == file_path.get_native_path_case(self.full_path))), (
324 tainted,
325 self.full_path,
326 file_path.get_native_path_case(self.full_path))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000327
328 @property
329 def existent(self):
330 return self.size != -1
331
332 @property
333 def full_path(self):
334 if self.root:
335 return os.path.join(self.root, self.path)
336 return self.path
337
338 @property
339 def real_path(self):
340 """Returns the path with symlinks resolved."""
341 if not self._real_path:
maruel12e30012015-10-09 11:55:35 -0700342 self._real_path = fs.path.realpath(self.full_path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000343 return self._real_path
344
345 @property
346 def size(self):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000347 """File's size. -1 is not existent.
348
349 Once tainted, it is not possible the retrieve the file size anymore since
350 the path is composed of variables.
351 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000352 if self._size is None and not self.tainted:
353 try:
maruel12e30012015-10-09 11:55:35 -0700354 self._size = fs.stat(self.full_path).st_size
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000355 except OSError:
356 self._size = -1
357 return self._size
358
359 def flatten(self):
360 """Returns a dict representing this object.
361
362 A 'size' of 0 means the file was only touched and not read.
363 """
364 return {
365 'path': self.path,
366 'size': self.size,
367 }
368
369 def replace_variables(self, variables):
370 """Replaces the root of this File with one of the variables if it matches.
371
372 If a variable replacement occurs, the cloned object becomes tainted.
373 """
374 for variable, root_path in variables.iteritems():
375 if self.path.startswith(root_path):
376 return self._clone(
377 self.root, variable + self.path[len(root_path):], True)
378 # No need to clone, returns ourself.
379 return self
380
381 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000382 """Returns a clone of itself with 'root' stripped off.
383
384 Note that the file is kept if it is either accessible from a symlinked
385 path that was used to access the file or through the real path.
386 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000387 # Check internal consistency.
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000388 assert (
389 self.tainted or
390 (file_path.isabs(root) and root.endswith(os.path.sep))), root
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000391 if not self.full_path.startswith(root):
392 # Now try to resolve the symlinks to see if it can be reached this way.
393 # Only try *after* trying without resolving symlink.
394 if not self.real_path.startswith(root):
395 return None
396 path = self.real_path
397 else:
398 path = self.full_path
399 return self._clone(root, path[len(root):], self.tainted)
400
401 def _clone(self, new_root, new_path, tainted):
402 raise NotImplementedError(self.__class__.__name__)
403
404 class File(_TouchedObject):
405 """A file that was accessed. May not be present anymore.
406
407 If tainted is true, it means it is not a real path anymore as a variable
408 replacement occured.
409
maruel@chromium.org538141b2013-06-03 20:57:17 +0000410 |mode| can be one of None, TOUCHED, READ or WRITE.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000411 """
maruel@chromium.org538141b2013-06-03 20:57:17 +0000412 # Was probed for existence, and it is existent, but was never _opened_.
413 TOUCHED = 't'
414 # Opened for read only and guaranteed to not have been written to.
415 READ = 'r'
416 # Opened for write.
417 WRITE = 'w'
418
419 # They are listed in order of priority. E.g. if a file is traced as TOUCHED
420 # then as WRITE, only keep WRITE. None means no idea, which is a problem on
421 # Windows.
422 ACCEPTABLE_MODES = (None, TOUCHED, READ, WRITE)
423
424 def __init__(self, root, path, tainted, size, mode):
425 assert mode in self.ACCEPTABLE_MODES
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000426 super(Results.File, self).__init__(root, path, tainted, size, 1)
maruel@chromium.org538141b2013-06-03 20:57:17 +0000427 self.mode = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000428
429 def _clone(self, new_root, new_path, tainted):
430 """Clones itself keeping meta-data."""
431 # Keep the self.size and self._real_path caches for performance reason. It
432 # is also important when the file becomes tainted (with a variable instead
433 # of the real path) since self.path is not an on-disk path anymore so
434 # out._size cannot be updated.
maruel@chromium.org538141b2013-06-03 20:57:17 +0000435 out = self.__class__(new_root, new_path, tainted, self.size, self.mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000436 out._real_path = self._real_path
437 return out
438
maruel@chromium.org538141b2013-06-03 20:57:17 +0000439 def flatten(self):
440 out = super(Results.File, self).flatten()
441 out['mode'] = self.mode
442 return out
443
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000444 class Directory(_TouchedObject):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000445 """A directory of files. Must exist.
446
447 For a Directory instance, self.size is not a cache, it's an actual value
448 that is never modified and represents the total size of the files contained
449 in this directory. It is possible that the directory is empty so that
450 size==0; this happens if there's only an invalid symlink in it.
451 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000452 def __init__(self, root, path, tainted, size, nb_files):
453 """path='.' is a valid value and must be handled appropriately."""
454 assert not path.endswith(os.path.sep), path
455 super(Results.Directory, self).__init__(
456 root, path + os.path.sep, tainted, size, nb_files)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000457
458 def flatten(self):
459 out = super(Results.Directory, self).flatten()
460 out['nb_files'] = self.nb_files
461 return out
462
463 def _clone(self, new_root, new_path, tainted):
464 """Clones itself keeping meta-data."""
465 out = self.__class__(
466 new_root,
467 new_path.rstrip(os.path.sep),
468 tainted,
469 self.size,
470 self.nb_files)
471 out._real_path = self._real_path
472 return out
473
474 class Process(object):
475 """A process that was traced.
476
477 Contains references to the files accessed by this process and its children.
478 """
479 def __init__(self, pid, files, executable, command, initial_cwd, children):
480 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
481 self.pid = pid
482 self.files = sorted(files, key=lambda x: x.path)
483 self.children = children
484 self.executable = executable
485 self.command = command
486 self.initial_cwd = initial_cwd
487
488 # Check internal consistency.
489 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
490 f.path for f in self.files)
491 assert isinstance(self.children, list)
492 assert isinstance(self.files, list)
493
494 @property
495 def all(self):
496 for child in self.children:
497 for i in child.all:
498 yield i
499 yield self
500
501 def flatten(self):
502 return {
503 'children': [c.flatten() for c in self.children],
504 'command': self.command,
505 'executable': self.executable,
506 'files': [f.flatten() for f in self.files],
507 'initial_cwd': self.initial_cwd,
508 'pid': self.pid,
509 }
510
511 def strip_root(self, root):
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000512 assert file_path.isabs(root) and root.endswith(os.path.sep), root
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000513 # Loads the files after since they are constructed as objects.
514 out = self.__class__(
515 self.pid,
516 filter(None, (f.strip_root(root) for f in self.files)),
517 self.executable,
518 self.command,
519 self.initial_cwd,
520 [c.strip_root(root) for c in self.children])
521 logging.debug(
522 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
523 return out
524
525 def __init__(self, process):
526 self.process = process
527 # Cache.
528 self._files = None
529
530 def flatten(self):
531 return {
532 'root': self.process.flatten(),
533 }
534
535 @property
536 def files(self):
537 if self._files is None:
538 self._files = sorted(
539 sum((p.files for p in self.process.all), []),
540 key=lambda x: x.path)
541 return self._files
542
543 @property
544 def existent(self):
545 return [f for f in self.files if f.existent]
546
547 @property
548 def non_existent(self):
549 return [f for f in self.files if not f.existent]
550
551 def strip_root(self, root):
552 """Returns a clone with all the files outside the directory |root| removed
553 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000554
555 It keeps files accessible through the |root| directory or that have been
556 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000557 """
558 # Resolve any symlink
maruel12e30012015-10-09 11:55:35 -0700559 root = fs.realpath(root)
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000560 root = (
561 file_path.get_native_path_case(root).rstrip(os.path.sep) + os.path.sep)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000562 logging.debug('strip_root(%s)' % root)
563 return Results(self.process.strip_root(root))
564
565
566class ApiBase(object):
567 """OS-agnostic API to trace a process and its children."""
568 class Context(object):
569 """Processes one log line at a time and keeps the list of traced processes.
570
571 The parsing is complicated by the fact that logs are traced out of order for
572 strace but in-order for dtrace and logman. In addition, on Windows it is
573 very frequent that processids are reused so a flat list cannot be used. But
574 at the same time, it is impossible to faithfully construct a graph when the
575 logs are processed out of order. So both a tree and a flat mapping are used,
576 the tree is the real process tree, while the flat mapping stores the last
577 valid process for the corresponding processid. For the strace case, the
578 tree's head is guessed at the last moment.
579 """
580 class Process(object):
581 """Keeps context for one traced child process.
582
583 Logs all the files this process touched. Ignores directories.
584 """
585 def __init__(self, blacklist, pid, initial_cwd):
586 # Check internal consistency.
587 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000588 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000589 self.pid = pid
590 # children are Process instances.
591 self.children = []
592 self.initial_cwd = initial_cwd
593 self.cwd = None
maruel@chromium.org538141b2013-06-03 20:57:17 +0000594 self.files = {}
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000595 self.executable = None
596 self.command = None
597 self._blacklist = blacklist
598
599 def to_results_process(self):
600 """Resolves file case sensitivity and or late-bound strings."""
601 # When resolving files, it's normal to get dupe because a file could be
602 # opened multiple times with different case. Resolve the deduplication
603 # here.
604 def fix_path(x):
605 """Returns the native file path case.
606
607 Converts late-bound strings.
608 """
609 if not x:
610 # Do not convert None instance to 'None'.
611 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000612 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000613 if os.path.isabs(x):
614 # If the path is not absolute, which tends to happen occasionally on
615 # Windows, it is not possible to get the native path case so ignore
616 # that trace. It mostly happens for 'executable' value.
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000617 x = file_path.get_native_path_case(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000618 return x
619
maruel@chromium.org538141b2013-06-03 20:57:17 +0000620 def fix_and_blacklist_path(x, m):
621 """Receives a tuple (filepath, mode) and processes filepath."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000622 x = fix_path(x)
623 if not x:
624 return
625 # The blacklist needs to be reapplied, since path casing could
626 # influence blacklisting.
627 if self._blacklist(x):
628 return
maruel@chromium.org538141b2013-06-03 20:57:17 +0000629 # Filters out directories. Some may have passed through.
maruel12e30012015-10-09 11:55:35 -0700630 if fs.isdir(x):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000631 return
632 return x, m
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000633
maruel@chromium.org538141b2013-06-03 20:57:17 +0000634 # Renders all the files as strings, as some could be RelativePath
635 # instances. It is important to do it first since there could still be
636 # multiple entries with the same path but different modes.
637 rendered = (
638 fix_and_blacklist_path(f, m) for f, m in self.files.iteritems())
639 files = sorted(
640 (f for f in rendered if f),
641 key=lambda x: (x[0], Results.File.ACCEPTABLE_MODES.index(x[1])))
642 # Then converting into a dict will automatically clean up lesser
643 # important values.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000644 files = [
maruel@chromium.org538141b2013-06-03 20:57:17 +0000645 Results.File(None, f, False, None, m)
646 for f, m in dict(files).iteritems()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000647 ]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000648 return Results.Process(
649 self.pid,
650 files,
651 fix_path(self.executable),
652 self.command,
653 fix_path(self.initial_cwd),
654 [c.to_results_process() for c in self.children])
655
maruel@chromium.org538141b2013-06-03 20:57:17 +0000656 def add_file(self, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000657 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000658 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000659 return
maruel@chromium.org538141b2013-06-03 20:57:17 +0000660 logging.debug('add_file(%d, %s, %s)', self.pid, filepath, mode)
661 # Note that filepath and not render(filepath) is added. It is because
662 # filepath could be something else than a string, like a RelativePath
663 # instance for dtrace logs.
664 modes = Results.File.ACCEPTABLE_MODES
665 old_mode = self.files.setdefault(filepath, mode)
666 if old_mode != mode and modes.index(old_mode) < modes.index(mode):
667 # Take the highest value.
668 self.files[filepath] = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000669
670 def __init__(self, blacklist):
671 self.blacklist = blacklist
672 # Initial process.
673 self.root_process = None
674 # dict to accelerate process lookup, to not have to lookup the whole graph
675 # each time.
676 self._process_lookup = {}
677
678 class Tracer(object):
679 """During it's lifetime, the tracing subsystem is enabled."""
680 def __init__(self, logname):
681 self._logname = logname
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000682 self._lock = threading.RLock()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000683 self._traces = []
684 self._initialized = True
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000685 self._scripts_to_cleanup = []
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000686
687 def trace(self, cmd, cwd, tracename, output):
688 """Runs the OS-specific trace program on an executable.
689
690 Arguments:
691 - cmd: The command (a list) to run.
692 - cwd: Current directory to start the child process in.
693 - tracename: Name of the trace in the logname file.
694 - output: If False, redirects output to PIPEs.
695
696 Returns a tuple (resultcode, output) and updates the internal trace
697 entries.
698 """
699 # The implementation adds an item to self._traces.
700 raise NotImplementedError(self.__class__.__name__)
701
702 def close(self, _timeout=None):
703 """Saves the meta-data in the logname file.
704
705 For kernel-based tracing, stops the tracing subsystem.
706
707 Must not be used manually when using 'with' construct.
708 """
709 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000710 if not self._initialized:
711 raise TracingFailure(
712 'Called %s.close() on an unitialized object' %
713 self.__class__.__name__,
714 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000715 try:
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000716 while self._scripts_to_cleanup:
717 try:
maruel12e30012015-10-09 11:55:35 -0700718 fs.remove(self._scripts_to_cleanup.pop())
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000719 except OSError as e:
720 logging.error('Failed to delete a temporary script: %s', e)
Marc-Antoine Ruelde011802013-11-12 15:19:47 -0500721 tools.write_json(self._logname, self._gen_logdata(), False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000722 finally:
723 self._initialized = False
724
725 def post_process_log(self):
726 """Post-processes the log so it becomes faster to load afterward.
727
728 Must not be used manually when using 'with' construct.
729 """
730 assert not self._initialized, 'Must stop tracing first.'
731
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000732 def _gen_logdata(self):
733 """Returns the data to be saved in the trace file."""
734 return {
735 'traces': self._traces,
736 }
737
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000738 def __enter__(self):
739 """Enables 'with' statement."""
740 return self
741
742 def __exit__(self, exc_type, exc_value, traceback):
743 """Enables 'with' statement."""
744 self.close()
745 # If an exception was thrown, do not process logs.
746 if not exc_type:
747 self.post_process_log()
748
749 def get_tracer(self, logname):
750 """Returns an ApiBase.Tracer instance.
751
752 Initializes the tracing subsystem, which is a requirement for kernel-based
753 tracers. Only one tracer instance should be live at a time!
754
755 logname is the filepath to the json file that will contain the meta-data
756 about the logs.
757 """
758 return self.Tracer(logname)
759
760 @staticmethod
761 def clean_trace(logname):
762 """Deletes an old log."""
763 raise NotImplementedError()
764
765 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +0000766 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000767 """Processes trace logs and returns the files opened and the files that do
768 not exist.
769
770 It does not track directories.
771
maruel@chromium.orgec74ff82012-10-29 18:14:47 +0000772 Arguments:
773 - logname: must be an absolute path.
774 - blacklist: must be a lambda.
775 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +0000776
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000777 Most of the time, files that do not exist are temporary test files that
778 should be put in /tmp instead. See http://crbug.com/116251.
779
780 Returns a list of dict with keys:
781 - results: A Results instance.
782 - trace: The corresponding tracename parameter provided to
783 get_tracer().trace().
784 - output: Output gathered during execution, if get_tracer().trace(...,
785 output=False) was used.
786 """
787 raise NotImplementedError(cls.__class__.__name__)
788
789
790class Strace(ApiBase):
791 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000792 @staticmethod
793 def load_filename(filename):
794 """Parses a filename in a log."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000795 # TODO(maruel): Be compatible with strace -x.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000796 assert isinstance(filename, str)
797 out = ''
798 i = 0
799 while i < len(filename):
800 c = filename[i]
801 if c == '\\':
802 out += chr(int(filename[i+1:i+4], 8))
803 i += 4
804 else:
805 out += c
806 i += 1
807 # TODO(maruel): That's not necessarily true that the current code page is
808 # utf-8.
809 return out.decode('utf-8')
810
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000811 class Context(ApiBase.Context):
812 """Processes a strace log line and keeps the list of existent and non
813 existent files accessed.
814
815 Ignores directories.
816
817 Uses late-binding to processes the cwd of each process. The problem is that
818 strace generates one log file per process it traced but doesn't give any
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000819 information about which process was started when and by who. So process the
820 logs out of order and use late binding with RelativePath to be able to
821 deduce the initial directory of each process once all the logs are parsed.
822
823 TODO(maruel): Use the script even in the non-sudo case, so log parsing can
824 be done in two phase: first find the root process, then process the child
825 processes in order. With that, it should be possible to not use RelativePath
826 anymore. This would significantly simplify the code!
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000827 """
828 class Process(ApiBase.Context.Process):
829 """Represents the state of a process.
830
831 Contains all the information retrieved from the pid-specific log.
832 """
833 # Function names are using ([a-z_0-9]+)
834 # This is the most common format. function(args) = result
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000835 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.*?)\)\s+= (.+)$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000836 # An interrupted function call, only grab the minimal header.
837 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
838 # A resumed function call.
839 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
840 # A process received a signal.
841 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
842 # A process didn't handle a signal. Ignore any junk appearing before,
843 # because the process was forcibly killed so it won't open any new file.
844 RE_KILLED = re.compile(
845 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
846 # The process has exited.
847 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
848 # A call was canceled. Ignore any prefix.
849 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
850 # Happens when strace fails to even get the function name.
851 UNNAMED_FUNCTION = '????'
852
853 # Corner-case in python, a class member function decorator must not be
854 # @staticmethod.
855 def parse_args(regexp, expect_zero): # pylint: disable=E0213
856 """Automatically convert the str 'args' into a list of processed
857 arguments.
858
859 Arguments:
860 - regexp is used to parse args.
861 - expect_zero: one of True, False or None.
862 - True: will check for result.startswith('0') first and will ignore
863 the trace line completely otherwise. This is important because for
864 many functions, the regexp will not process if the call failed.
865 - False: will check for not result.startswith(('?', '-1')) for the
866 same reason than with True.
867 - None: ignore result.
868 """
869 def meta_hook(function):
870 assert function.__name__.startswith('handle_')
871 def hook(self, args, result):
872 if expect_zero is True and not result.startswith('0'):
873 return
874 if expect_zero is False and result.startswith(('?', '-1')):
875 return
876 match = re.match(regexp, args)
877 if not match:
878 raise TracingFailure(
879 'Failed to parse %s(%s) = %s' %
880 (function.__name__[len('handle_'):], args, result),
881 None, None, None)
882 return function(self, match.groups(), result)
883 return hook
884 return meta_hook
885
886 class RelativePath(object):
887 """A late-bound relative path."""
888 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000889 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000890 self.parent = parent
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000891 assert (
892 value is None or
893 (isinstance(value, unicode) and not os.path.isabs(value)))
894 self.value = value
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000895 if self.value:
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000896 # TODO(maruel): On POSIX, '\\' is a valid character so remove this
897 # assert.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000898 assert '\\' not in self.value, value
899 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000900
901 def render(self):
902 """Returns the current directory this instance is representing.
903
904 This function is used to return the late-bound value.
905 """
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000906 assert self.parent is not None
907 parent = render(self.parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000908 if self.value:
909 return os.path.normpath(os.path.join(parent, self.value))
910 return parent
911
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000912 def __init__(self, root, pid):
913 """Keeps enough information to be able to guess the original process
914 root.
915
916 strace doesn't store which process was the initial process. So more
917 information needs to be kept so the graph can be reconstructed from the
918 flat map.
919 """
920 logging.info('%s(%d)' % (self.__class__.__name__, pid))
921 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
922 assert isinstance(root, ApiBase.Context)
923 self._root = weakref.ref(root)
924 # The dict key is the function name of the pending call, like 'open'
925 # or 'execve'.
926 self._pending_calls = {}
927 self._line_number = 0
928 # Current directory when the process started.
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000929 if isinstance(self._root(), unicode):
930 self.initial_cwd = self._root()
931 else:
932 self.initial_cwd = self.RelativePath(self._root(), None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000933 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +0000934 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000935
936 def get_cwd(self):
937 """Returns the best known value of cwd."""
938 return self.cwd or self.initial_cwd
939
940 def render(self):
941 """Returns the string value of the RelativePath() object.
942
943 Used by RelativePath. Returns the initial directory and not the
944 current one since the current directory 'cwd' validity is time-limited.
945
946 The validity is only guaranteed once all the logs are processed.
947 """
948 return self.initial_cwd.render()
949
950 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000951 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000952 self._line_number += 1
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000953 try:
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000954 if self._done:
955 raise TracingFailure(
956 'Found a trace for a terminated process or corrupted log',
957 None, None, None)
958
959 if self.RE_SIGNAL.match(line):
960 # Ignore signals.
961 return
962
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000963 match = self.RE_KILLED.match(line)
964 if match:
965 # Converts a '+++ killed by Foo +++' trace into an exit_group().
966 self.handle_exit_group(match.group(1), None)
967 return
968
969 match = self.RE_PROCESS_EXITED.match(line)
970 if match:
971 # Converts a '+++ exited with 1 +++' trace into an exit_group()
972 self.handle_exit_group(match.group(1), None)
973 return
974
975 match = self.RE_UNFINISHED.match(line)
976 if match:
977 if match.group(1) in self._pending_calls:
978 raise TracingFailure(
979 'Found two unfinished calls for the same function',
980 None, None, None,
981 self._pending_calls)
982 self._pending_calls[match.group(1)] = (
983 match.group(1) + match.group(2))
984 return
985
986 match = self.RE_UNAVAILABLE.match(line)
987 if match:
988 # This usually means a process was killed and a pending call was
989 # canceled.
990 # TODO(maruel): Look up the last exit_group() trace just above and
991 # make sure any self._pending_calls[anything] is properly flushed.
992 return
993
994 match = self.RE_RESUMED.match(line)
995 if match:
996 if match.group(1) not in self._pending_calls:
997 raise TracingFailure(
998 'Found a resumed call that was not logged as unfinished',
999 None, None, None,
1000 self._pending_calls)
1001 pending = self._pending_calls.pop(match.group(1))
1002 # Reconstruct the line.
1003 line = pending + match.group(2)
1004
1005 match = self.RE_HEADER.match(line)
1006 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001007 # The line is corrupted. It happens occasionally when a process is
1008 # killed forcibly with activity going on. Assume the process died.
1009 # No other line can be processed afterward.
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001010 logging.debug('%d is done: %s', self.pid, line)
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001011 self._done = True
1012 return
1013
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001014 if match.group(1) == self.UNNAMED_FUNCTION:
1015 return
1016
1017 # It's a valid line, handle it.
1018 handler = getattr(self, 'handle_%s' % match.group(1), None)
1019 if not handler:
1020 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1021 return handler(match.group(2), match.group(3))
1022 except TracingFailure, e:
1023 # Hack in the values since the handler could be a static function.
1024 e.pid = self.pid
1025 e.line = line
1026 e.line_number = self._line_number
1027 # Re-raise the modified exception.
1028 raise
1029 except (KeyError, NotImplementedError, ValueError), e:
1030 raise TracingFailure(
1031 'Trace generated a %s exception: %s' % (
1032 e.__class__.__name__, str(e)),
1033 self.pid,
1034 self._line_number,
1035 line,
1036 e)
1037
1038 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1039 def handle_access(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001040 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001041
1042 @parse_args(r'^\"(.+?)\"$', True)
1043 def handle_chdir(self, args, _result):
1044 """Updates cwd."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001045 self.cwd = self._mangle(args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001046 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1047
maruel@chromium.org538141b2013-06-03 20:57:17 +00001048 @parse_args(r'^\"(.+?)\", (\d+), (\d+)$', False)
1049 def handle_chown(self, args, _result):
1050 # TODO(maruel): Look at result?
1051 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org55550722013-05-28 14:43:14 +00001052
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001053 def handle_clone(self, _args, result):
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001054 self._handling_forking('clone', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001055
1056 def handle_close(self, _args, _result):
1057 pass
1058
maruel@chromium.org538141b2013-06-03 20:57:17 +00001059 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1060 def handle_chmod(self, args, _result):
1061 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001062
maruel@chromium.org538141b2013-06-03 20:57:17 +00001063 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1064 def handle_creat(self, args, _result):
1065 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001066
1067 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1068 def handle_execve(self, args, _result):
1069 # Even if in practice execve() doesn't returns when it succeeds, strace
1070 # still prints '0' as the result.
1071 filepath = args[0]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001072 self._handle_file(filepath, Results.File.READ)
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001073 self.executable = self._mangle(filepath)
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001074 try:
1075 self.command = strace_process_quoted_arguments(args[1])
1076 except ValueError as e:
1077 raise TracingFailure(
1078 'Failed to process command line argument:\n%s' % e.args[0],
1079 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001080
1081 def handle_exit_group(self, _args, _result):
1082 """Removes cwd."""
1083 self.cwd = None
1084
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001085 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1086 def handle_faccessat(self, args, _results):
1087 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001088 self._handle_file(args[1], Results.File.TOUCHED)
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001089 else:
1090 raise Exception('Relative faccess not implemented.')
1091
maruel@chromium.org0781f322013-05-28 19:45:49 +00001092 def handle_fallocate(self, _args, result):
1093 pass
1094
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001095 def handle_fork(self, args, result):
1096 self._handle_unknown('fork', args, result)
1097
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001098 def handle_futex(self, _args, _result):
1099 pass
1100
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001101 @parse_args(r'^(\".+?\"|0x[a-f0-9]+), (\d+)$', False)
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001102 def handle_getcwd(self, args, _result):
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001103 # TODO(maruel): Resolve file handle.
1104 if not args[0].startswith('0x'):
1105 filepath = args[0][1:-1]
1106 if os.path.isabs(filepath):
1107 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1108 if not isinstance(self.cwd, unicode):
1109 # Take the occasion to reset the path.
1110 self.cwd = self._mangle(filepath)
1111 else:
1112 # It should always match.
1113 assert self.cwd == Strace.load_filename(filepath), (
1114 self.cwd, filepath)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001115
1116 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1117 def handle_link(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001118 self._handle_file(args[0], Results.File.READ)
1119 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001120
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001121 @parse_args(r'^(\".+?\"|0x[a-f0-9]+), \{(?:.+?, )?\.\.\.\}$', True)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001122 def handle_lstat(self, args, _result):
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001123 # TODO(maruel): Resolve file handle.
1124 if not args[0].startswith('0x'):
1125 self._handle_file(args[0][1:-1], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001126
1127 def handle_mkdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001128 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001129 pass
1130
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001131 @parse_args(r'^(\".*?\"|0x[a-f0-9]+), ([A-Z\_\|]+)(|, \d+)$', False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001132 def handle_open(self, args, _result):
1133 if 'O_DIRECTORY' in args[1]:
1134 return
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001135 # TODO(maruel): Resolve file handle.
1136 if not args[0].startswith('0x'):
1137 t = Results.File.READ if 'O_RDONLY' in args[1] else Results.File.WRITE
1138 self._handle_file(args[0][1:-1], t)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001139
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001140 @parse_args(
1141 r'^(\d+|AT_FDCWD), (\".*?\"|0x[a-f0-9]+), ([A-Z\_\|]+)(|, \d+)$',
1142 False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001143 def handle_openat(self, args, _result):
1144 if 'O_DIRECTORY' in args[2]:
1145 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001146 if args[0] == 'AT_FDCWD':
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001147 # TODO(maruel): Resolve file handle.
1148 if not args[1].startswith('0x'):
1149 t = (
1150 Results.File.READ if 'O_RDONLY' in args[2]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001151 else Results.File.WRITE)
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001152 self._handle_file(args[1][1:-1], t)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001153 else:
1154 # TODO(maruel): Implement relative open if necessary instead of the
1155 # AT_FDCWD flag, let's hope not since this means tracking all active
1156 # directory handles.
maruel@chromium.org538141b2013-06-03 20:57:17 +00001157 raise NotImplementedError('Relative open via openat not implemented.')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001158
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001159 @parse_args(
1160 r'^(\".+?\"|0x[a-f0-9]+), (?:\".+?\"(?:\.\.\.)?|0x[a-f0-9]+), '
1161 '\d+$',
1162 False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001163 def handle_readlink(self, args, _result):
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001164 # TODO(maruel): Resolve file handle.
1165 if not args[0].startswith('0x'):
1166 self._handle_file(args[0][1:-1], Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001167
1168 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1169 def handle_rename(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001170 self._handle_file(args[0], Results.File.READ)
1171 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001172
1173 def handle_rmdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001174 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001175 pass
1176
1177 def handle_setxattr(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001178 # TODO(maruel): self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001179 pass
1180
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001181 @parse_args(r'^(\".+?\"|0x[a-f0-9]+), \{.+?\}$', True)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001182 def handle_stat(self, args, _result):
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001183 # TODO(maruel): Resolve file handle.
1184 if not args[0].startswith('0x'):
1185 self._handle_file(args[0][1:-1], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001186
maruel@chromium.org538141b2013-06-03 20:57:17 +00001187 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1188 def handle_symlink(self, args, _result):
1189 self._handle_file(args[0], Results.File.TOUCHED)
1190 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001191
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001192 @parse_args(r'^\"(.+?)\", \d+', True)
1193 def handle_truncate(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001194 self._handle_file(args[0], Results.File.WRITE)
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001195
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001196 def handle_unlink(self, _args, _result):
1197 # In theory, the file had to be created anyway.
1198 pass
1199
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001200 def handle_unlinkat(self, _args, _result):
1201 # In theory, the file had to be created anyway.
1202 pass
1203
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001204 def handle_statfs(self, _args, _result):
1205 pass
1206
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001207 def handle_utimensat(self, _args, _result):
1208 pass
1209
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001210 def handle_vfork(self, _args, result):
1211 self._handling_forking('vfork', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001212
1213 @staticmethod
1214 def _handle_unknown(function, args, result):
1215 raise TracingFailure(
1216 'Unexpected/unimplemented trace %s(%s)= %s' %
1217 (function, args, result),
1218 None, None, None)
1219
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001220 def _handling_forking(self, name, result):
1221 """Transfers cwd."""
1222 if result.startswith(('?', '-1')):
1223 # The call failed.
1224 return
1225 # Update the other process right away.
1226 childpid = int(result)
1227 child = self._root().get_or_set_proc(childpid)
1228 if child.parentid is not None or childpid in self.children:
1229 raise TracingFailure(
1230 'Found internal inconsitency in process lifetime detection '
1231 'during a %s() call' % name,
1232 None, None, None)
1233
1234 # Copy the cwd object.
1235 child.initial_cwd = self.get_cwd()
1236 child.parentid = self.pid
1237 # It is necessary because the logs are processed out of order.
1238 self.children.append(child)
1239
maruel@chromium.org538141b2013-06-03 20:57:17 +00001240 def _handle_file(self, filepath, mode):
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001241 filepath = self._mangle(filepath)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001242 self.add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001243
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001244 def _mangle(self, filepath):
1245 """Decodes a filepath found in the log and convert it to a late-bound
1246 path if necessary.
1247
1248 |filepath| is an strace 'encoded' string and the returned value is
1249 either an unicode string if the path was absolute or a late bound path
1250 otherwise.
1251 """
1252 filepath = Strace.load_filename(filepath)
1253 if os.path.isabs(filepath):
1254 return filepath
1255 else:
1256 if isinstance(self.get_cwd(), unicode):
1257 return os.path.normpath(os.path.join(self.get_cwd(), filepath))
1258 return self.RelativePath(self.get_cwd(), filepath)
1259
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001260 def __init__(self, blacklist, root_pid, initial_cwd):
1261 """|root_pid| may be None when the root process is not known.
1262
1263 In that case, a search is done after reading all the logs to figure out
1264 the root process.
1265 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001266 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001267 assert_is_renderable(initial_cwd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001268 self.root_pid = root_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001269 self.initial_cwd = initial_cwd
1270
1271 def render(self):
1272 """Returns the string value of the initial cwd of the root process.
1273
1274 Used by RelativePath.
1275 """
1276 return self.initial_cwd
1277
1278 def on_line(self, pid, line):
1279 """Transfers control into the Process.on_line() function."""
1280 self.get_or_set_proc(pid).on_line(line.strip())
1281
1282 def to_results(self):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001283 """If necessary, finds back the root process and verify consistency."""
1284 if not self.root_pid:
1285 # The non-sudo case. The traced process was started by strace itself,
1286 # so the pid of the traced process is not known.
1287 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1288 if len(root) == 1:
1289 self.root_process = root[0]
1290 # Save it for later.
1291 self.root_pid = self.root_process.pid
1292 else:
1293 # The sudo case. The traced process was started manually so its pid is
1294 # known.
1295 self.root_process = self._process_lookup.get(self.root_pid)
1296 if not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001297 raise TracingFailure(
1298 'Found internal inconsitency in process lifetime detection '
1299 'while finding the root process',
1300 None,
1301 None,
1302 None,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001303 self.root_pid,
1304 sorted(self._process_lookup))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001305 process = self.root_process.to_results_process()
1306 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1307 raise TracingFailure(
1308 'Found internal inconsitency in process lifetime detection '
1309 'while looking for len(tree) == len(list)',
1310 None,
1311 None,
1312 None,
1313 sorted(self._process_lookup),
1314 sorted(p.pid for p in process.all))
1315 return Results(process)
1316
1317 def get_or_set_proc(self, pid):
1318 """Returns the Context.Process instance for this pid or creates a new one.
1319 """
1320 if not pid or not isinstance(pid, int):
1321 raise TracingFailure(
1322 'Unpexpected value for pid: %r' % pid,
1323 pid,
1324 None,
1325 None,
1326 pid)
1327 if pid not in self._process_lookup:
1328 self._process_lookup[pid] = self.Process(self, pid)
1329 return self._process_lookup[pid]
1330
1331 @classmethod
1332 def traces(cls):
1333 """Returns the list of all handled traces to pass this as an argument to
1334 strace.
1335 """
1336 prefix = 'handle_'
1337 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1338
1339 class Tracer(ApiBase.Tracer):
1340 MAX_LEN = 256
1341
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001342 def __init__(self, logname, use_sudo):
1343 super(Strace.Tracer, self).__init__(logname)
1344 self.use_sudo = use_sudo
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001345 if use_sudo:
1346 # TODO(maruel): Use the jump script systematically to make it easy to
1347 # figure out the root process, so RelativePath is not necessary anymore.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001348 self._child_script = create_exec_thunk()
1349 self._scripts_to_cleanup.append(self._child_script)
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001350
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001351 def trace(self, cmd, cwd, tracename, output):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001352 """Runs strace on an executable.
1353
1354 When use_sudo=True, it is a 3-phases process: start the thunk, start
1355 sudo strace with the pid of the thunk and then have the thunk os.execve()
1356 the process to trace.
1357 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001358 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1359 assert os.path.isabs(cmd[0]), cmd[0]
1360 assert os.path.isabs(cwd), cwd
1361 assert os.path.normpath(cwd) == cwd, cwd
1362 with self._lock:
1363 if not self._initialized:
1364 raise TracingFailure(
1365 'Called Tracer.trace() on an unitialized object',
1366 None, None, None, tracename)
1367 assert tracename not in (i['trace'] for i in self._traces)
1368 stdout = stderr = None
1369 if output:
1370 stdout = subprocess.PIPE
1371 stderr = subprocess.STDOUT
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001372
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001373 # Ensure all file related APIs are hooked.
1374 traces = ','.join(Strace.Context.traces() + ['file'])
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001375 flags = [
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001376 # Each child process has its own trace file. It is necessary because
1377 # strace may generate corrupted log file if multiple processes are
1378 # heavily doing syscalls simultaneously.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001379 '-ff',
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001380 # Reduce whitespace usage.
1381 '-a1',
1382 # hex encode non-ascii strings.
1383 # TODO(maruel): '-x',
1384 # TODO(maruel): '-ttt',
1385 # Signals are unnecessary noise here. Note the parser can cope with them
1386 # but reduce the unnecessary output.
1387 '-esignal=none',
1388 # Print as much data as wanted.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001389 '-s', '%d' % self.MAX_LEN,
1390 '-e', 'trace=%s' % traces,
1391 '-o', self._logname + '.' + tracename,
1392 ]
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001393
1394 if self.use_sudo:
1395 pipe_r, pipe_w = os.pipe()
1396 # Start the child process paused.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001397 target_cmd = [sys.executable, self._child_script, str(pipe_r)] + cmd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001398 logging.debug(' '.join(target_cmd))
1399 child_proc = subprocess.Popen(
1400 target_cmd,
1401 stdin=subprocess.PIPE,
1402 stdout=stdout,
1403 stderr=stderr,
1404 cwd=cwd)
1405
1406 # TODO(maruel): both processes must use the same UID for it to work
1407 # without sudo. Look why -p is failing at the moment without sudo.
1408 trace_cmd = [
1409 'sudo',
1410 'strace',
1411 '-p', str(child_proc.pid),
1412 ] + flags
1413 logging.debug(' '.join(trace_cmd))
1414 strace_proc = subprocess.Popen(
1415 trace_cmd,
1416 cwd=cwd,
1417 stdin=subprocess.PIPE,
1418 stdout=subprocess.PIPE,
1419 stderr=subprocess.PIPE)
1420
1421 line = strace_proc.stderr.readline()
1422 if not re.match(r'^Process \d+ attached \- interrupt to quit$', line):
1423 # TODO(maruel): Raise an exception.
1424 assert False, line
1425
1426 # Now fire the child process.
1427 os.write(pipe_w, 'x')
1428
1429 out = child_proc.communicate()[0]
1430 strace_out = strace_proc.communicate()[0]
1431
1432 # TODO(maruel): if strace_proc.returncode: Add an exception.
1433 saved_out = strace_out if strace_proc.returncode else out
1434 root_pid = child_proc.pid
1435 else:
1436 # Non-sudo case.
1437 trace_cmd = [
1438 'strace',
1439 ] + flags + cmd
1440 logging.debug(' '.join(trace_cmd))
1441 child_proc = subprocess.Popen(
1442 trace_cmd,
1443 cwd=cwd,
1444 stdin=subprocess.PIPE,
1445 stdout=stdout,
1446 stderr=stderr)
1447 out = child_proc.communicate()[0]
1448 # TODO(maruel): Walk the logs and figure out the root process would
1449 # simplify parsing the logs a *lot*.
1450 saved_out = out
1451 # The trace reader will have to figure out.
1452 root_pid = None
1453
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001454 with self._lock:
1455 assert tracename not in (i['trace'] for i in self._traces)
1456 self._traces.append(
1457 {
1458 'cmd': cmd,
1459 'cwd': cwd,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001460 'output': saved_out,
1461 'pid': root_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001462 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001463 })
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001464 return child_proc.returncode, out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001465
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001466 def __init__(self, use_sudo=None):
1467 super(Strace, self).__init__()
1468 self.use_sudo = use_sudo
1469
1470 def get_tracer(self, logname):
1471 return self.Tracer(logname, self.use_sudo)
1472
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001473 @staticmethod
1474 def clean_trace(logname):
maruel12e30012015-10-09 11:55:35 -07001475 if fs.isfile(logname):
1476 fs.remove(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001477 # Also delete any pid specific file from previous traces.
1478 for i in glob.iglob(logname + '.*'):
1479 if i.rsplit('.', 1)[1].isdigit():
maruel12e30012015-10-09 11:55:35 -07001480 fs.remove(i)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001481
1482 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001483 def parse_log(cls, logname, blacklist, trace_name):
1484 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001485 assert os.path.isabs(logname)
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05001486 data = tools.read_json(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001487 out = []
1488 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001489 if trace_name and item['trace'] != trace_name:
1490 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001491 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001492 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001493 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001494 }
1495 try:
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001496 context = cls.Context(blacklist, item['pid'], item['cwd'])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001497 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001498 logging.debug('Reading %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001499 pid = pidfile.rsplit('.', 1)[1]
1500 if pid.isdigit():
1501 pid = int(pid)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001502 found_line = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001503 for line in open(pidfile, 'rb'):
1504 context.on_line(pid, line)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001505 found_line = True
1506 if not found_line:
1507 # Ensures that a completely empty trace still creates the
1508 # corresponding Process instance by logging a dummy line.
1509 context.on_line(pid, '')
1510 else:
1511 logging.warning('Found unexpected file %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001512 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001513 except TracingFailure:
1514 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001515 out.append(result)
1516 return out
1517
1518
1519class Dtrace(ApiBase):
1520 """Uses DTrace framework through dtrace. Requires root access.
1521
1522 Implies Mac OSX.
1523
1524 dtruss can't be used because it has compatibility issues with python.
1525
1526 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1527 get the absolute path of the 'cwd' dtrace variable from the probe.
1528
1529 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1530 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1531 manually.
1532
1533 errno is not printed in the log since this implementation currently only cares
1534 about files that were successfully opened.
1535 """
1536 class Context(ApiBase.Context):
1537 # Format: index pid function(args)
1538 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1539
1540 # Arguments parsing.
1541 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1542 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1543 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1544 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1545 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1546 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1547
maruel@chromium.orgac361162013-06-04 15:54:06 +00001548 # O_DIRECTORY is not defined on Windows and dtrace doesn't exist on Windows.
1549 O_DIRECTORY = os.O_DIRECTORY if hasattr(os, 'O_DIRECTORY') else None
maruel@chromium.org538141b2013-06-03 20:57:17 +00001550 O_RDWR = os.O_RDWR
1551 O_WRONLY = os.O_WRONLY
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001552
1553 class Process(ApiBase.Context.Process):
1554 def __init__(self, *args):
1555 super(Dtrace.Context.Process, self).__init__(*args)
1556 self.cwd = self.initial_cwd
1557
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001558 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001559 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001560 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001561 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001562 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001563 # Process ID of the temporary script created by create_subprocess_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001564 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001565 self._initial_cwd = initial_cwd
1566 self._line_number = 0
1567
1568 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001569 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001570 self._line_number += 1
1571 match = self.RE_HEADER.match(line)
1572 if not match:
1573 raise TracingFailure(
1574 'Found malformed line: %s' % line,
1575 None,
1576 self._line_number,
1577 line)
1578 fn = getattr(
1579 self,
1580 'handle_%s' % match.group(2).replace('-', '_'),
1581 self._handle_ignored)
1582 # It is guaranteed to succeed because of the regexp. Or at least I thought
1583 # it would.
1584 pid = int(match.group(1))
1585 try:
1586 return fn(pid, match.group(3))
1587 except TracingFailure, e:
1588 # Hack in the values since the handler could be a static function.
1589 e.pid = pid
1590 e.line = line
1591 e.line_number = self._line_number
1592 # Re-raise the modified exception.
1593 raise
1594 except (KeyError, NotImplementedError, ValueError), e:
1595 raise TracingFailure(
1596 'Trace generated a %s exception: %s' % (
1597 e.__class__.__name__, str(e)),
1598 pid,
1599 self._line_number,
1600 line,
1601 e)
1602
1603 def to_results(self):
1604 process = self.root_process.to_results_process()
1605 # Internal concistency check.
1606 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1607 raise TracingFailure(
1608 'Found internal inconsitency in process lifetime detection '
1609 'while looking for len(tree) == len(list)',
1610 None,
1611 None,
1612 None,
1613 sorted(self._process_lookup),
1614 sorted(p.pid for p in process.all))
1615 return Results(process)
1616
1617 def handle_dtrace_BEGIN(self, _pid, args):
1618 if not self.RE_DTRACE_BEGIN.match(args):
1619 raise TracingFailure(
1620 'Found internal inconsitency in dtrace_BEGIN log line',
1621 None, None, None)
1622
1623 def handle_proc_start(self, pid, args):
1624 """Transfers cwd.
1625
1626 The dtrace script already takes care of only tracing the processes that
1627 are child of the traced processes so there is no need to verify the
1628 process hierarchy.
1629 """
1630 if pid in self._process_lookup:
1631 raise TracingFailure(
1632 'Found internal inconsitency in proc_start: %d started two times' %
1633 pid,
1634 None, None, None)
1635 match = self.RE_PROC_START.match(args)
1636 if not match:
1637 raise TracingFailure(
1638 'Failed to parse arguments: %s' % args,
1639 None, None, None)
1640 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001641 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001642 proc = self.root_process = self.Process(
1643 self.blacklist, pid, self._initial_cwd)
1644 elif ppid in self._process_lookup:
1645 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1646 self._process_lookup[ppid].children.append(proc)
1647 else:
1648 # Another process tree, ignore.
1649 return
1650 self._process_lookup[pid] = proc
1651 logging.debug(
1652 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001653 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001654
1655 def handle_proc_exit(self, pid, _args):
1656 """Removes cwd."""
1657 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001658 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001659 self._process_lookup[pid].cwd = None
1660
1661 def handle_execve(self, pid, args):
1662 """Sets the process' executable.
1663
1664 TODO(maruel): Read command line arguments. See
1665 https://discussions.apple.com/thread/1980539 for an example.
1666 https://gist.github.com/1242279
1667
1668 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1669 :)
1670 """
1671 if not pid in self._process_lookup:
1672 # Another process tree, ignore.
1673 return
1674 match = self.RE_EXECVE.match(args)
1675 if not match:
1676 raise TracingFailure(
1677 'Failed to parse arguments: %r' % args,
1678 None, None, None)
1679 proc = self._process_lookup[pid]
1680 proc.executable = match.group(1)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001681 self._handle_file(pid, proc.executable, Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001682 proc.command = self.process_escaped_arguments(match.group(3))
1683 if int(match.group(2)) != len(proc.command):
1684 raise TracingFailure(
1685 'Failed to parse execve() arguments: %s' % args,
1686 None, None, None)
1687
1688 def handle_chdir(self, pid, args):
1689 """Updates cwd."""
1690 if pid not in self._process_lookup:
1691 # Another process tree, ignore.
1692 return
1693 cwd = self.RE_CHDIR.match(args).group(1)
1694 if not cwd.startswith('/'):
1695 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1696 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1697 else:
1698 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1699 cwd2 = cwd
1700 self._process_lookup[pid].cwd = cwd2
1701
1702 def handle_open_nocancel(self, pid, args):
1703 """Redirects to handle_open()."""
1704 return self.handle_open(pid, args)
1705
1706 def handle_open(self, pid, args):
1707 if pid not in self._process_lookup:
1708 # Another process tree, ignore.
1709 return
1710 match = self.RE_OPEN.match(args)
1711 if not match:
1712 raise TracingFailure(
1713 'Failed to parse arguments: %s' % args,
1714 None, None, None)
1715 flag = int(match.group(2), 16)
1716 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1717 # Ignore directories.
1718 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001719 self._handle_file(
1720 pid,
1721 match.group(1),
1722 Results.File.READ if not ((self.O_RDWR | self.O_WRONLY) & flag)
1723 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001724
1725 def handle_rename(self, pid, args):
1726 if pid not in self._process_lookup:
1727 # Another process tree, ignore.
1728 return
1729 match = self.RE_RENAME.match(args)
1730 if not match:
1731 raise TracingFailure(
1732 'Failed to parse arguments: %s' % args,
1733 None, None, None)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001734 self._handle_file(pid, match.group(1), Results.File.READ)
1735 self._handle_file(pid, match.group(2), Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001736
maruel@chromium.org538141b2013-06-03 20:57:17 +00001737 def _handle_file(self, pid, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001738 if not filepath.startswith('/'):
1739 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1740 # We can get '..' in the path.
1741 filepath = os.path.normpath(filepath)
1742 # Sadly, still need to filter out directories here;
1743 # saw open_nocancel(".", 0, 0) = 0 lines.
maruel12e30012015-10-09 11:55:35 -07001744 if fs.isdir(filepath):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001745 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001746 self._process_lookup[pid].add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001747
1748 def handle_ftruncate(self, pid, args):
1749 """Just used as a signal to kill dtrace, ignoring."""
1750 pass
1751
1752 @staticmethod
1753 def _handle_ignored(pid, args):
1754 """Is called for all the event traces that are not handled."""
1755 raise NotImplementedError('Please implement me')
1756
1757 @staticmethod
1758 def process_escaped_arguments(text):
1759 """Extracts escaped arguments on a string and return the arguments as a
1760 list.
1761
1762 Implemented as an automaton.
1763
1764 Example:
1765 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1766 function will return ['python2.7', '-c', 'print("hi")]
1767 """
1768 if not text.endswith('\\0'):
1769 raise ValueError('String is not null terminated: %r' % text, text)
1770 text = text[:-2]
1771
1772 def unescape(x):
1773 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1774 out = []
1775 escaped = False
1776 for i in x:
1777 if i == '\\' and not escaped:
1778 escaped = True
1779 continue
1780 escaped = False
1781 out.append(i)
1782 return ''.join(out)
1783
1784 return [unescape(i) for i in text.split('\\001')]
1785
1786 class Tracer(ApiBase.Tracer):
1787 # pylint: disable=C0301
1788 #
1789 # To understand the following code, you'll want to take a look at:
1790 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
1791 # https://wikis.oracle.com/display/DTrace/Variables
1792 # http://docs.oracle.com/cd/E19205-01/820-4221/
1793 #
1794 # 0. Dump all the valid probes into a text file. It is important, you
1795 # want to redirect into a file and you don't want to constantly 'sudo'.
1796 # $ sudo dtrace -l > probes.txt
1797 #
1798 # 1. Count the number of probes:
1799 # $ wc -l probes.txt
1800 # 81823 # On OSX 10.7, including 1 header line.
1801 #
1802 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1803 # likes and skipping the header with NR>1:
1804 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1805 # dtrace
1806 # fbt
1807 # io
1808 # ip
1809 # lockstat
1810 # mach_trap
1811 # proc
1812 # profile
1813 # sched
1814 # syscall
1815 # tcp
1816 # vminfo
1817 #
1818 # 3. List of valid probes:
1819 # $ grep syscall probes.txt | less
1820 # or use dtrace directly:
1821 # $ sudo dtrace -l -P syscall | less
1822 #
1823 # trackedpid is an associative array where its value can be 0, 1 or 2.
1824 # 0 is for untracked processes and is the default value for items not
1825 # in the associative array.
1826 # 1 is for tracked processes.
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001827 # 2 is for the script created by create_subprocess_thunk() only. It is not
1828 # tracked itself but all its decendants are.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001829 #
1830 # The script will kill itself only once waiting_to_die == 1 and
1831 # current_processes == 0, so that both getlogin() was called and that
1832 # all traced processes exited.
1833 #
1834 # TODO(maruel): Use cacheable predicates. See
1835 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1836 D_CODE = """
1837 dtrace:::BEGIN {
1838 waiting_to_die = 0;
1839 current_processes = 0;
1840 logindex = 0;
1841 printf("%d %d %s_%s(\\"%s\\")\\n",
1842 logindex, PID, probeprov, probename, SCRIPT);
1843 logindex++;
1844 }
1845
1846 proc:::start /trackedpid[ppid]/ {
1847 trackedpid[pid] = 1;
1848 current_processes += 1;
1849 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1850 logindex, pid, probeprov, probename,
1851 ppid,
1852 execname,
1853 current_processes);
1854 logindex++;
1855 }
1856 /* Should use SCRIPT but there is no access to this variable at that
1857 * point. */
1858 proc:::start /ppid == PID && execname == "Python"/ {
1859 trackedpid[pid] = 2;
1860 current_processes += 1;
1861 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1862 logindex, pid, probeprov, probename,
1863 ppid,
1864 execname,
1865 current_processes);
1866 logindex++;
1867 }
1868 proc:::exit /trackedpid[pid] &&
1869 current_processes == 1 &&
1870 waiting_to_die == 1/ {
1871 trackedpid[pid] = 0;
1872 current_processes -= 1;
1873 printf("%d %d %s_%s(%d)\\n",
1874 logindex, pid, probeprov, probename,
1875 current_processes);
1876 logindex++;
1877 exit(0);
1878 }
1879 proc:::exit /trackedpid[pid]/ {
1880 trackedpid[pid] = 0;
1881 current_processes -= 1;
1882 printf("%d %d %s_%s(%d)\\n",
1883 logindex, pid, probeprov, probename,
1884 current_processes);
1885 logindex++;
1886 }
1887
1888 /* Use an arcane function to detect when we need to die */
1889 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
1890 waiting_to_die = 1;
1891 printf("%d %d %s()\\n", logindex, pid, probefunc);
1892 logindex++;
1893 }
1894 syscall::ftruncate:entry /
1895 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
1896 exit(0);
1897 }
1898
1899 syscall::open*:entry /trackedpid[pid] == 1/ {
1900 self->open_arg0 = arg0;
1901 self->open_arg1 = arg1;
1902 self->open_arg2 = arg2;
1903 }
1904 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
1905 this->open_arg0 = copyinstr(self->open_arg0);
1906 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
1907 logindex, pid, probefunc,
1908 this->open_arg0,
1909 self->open_arg1,
1910 self->open_arg2);
1911 logindex++;
1912 this->open_arg0 = 0;
1913 }
1914 syscall::open*:return /trackedpid[pid] == 1/ {
1915 self->open_arg0 = 0;
1916 self->open_arg1 = 0;
1917 self->open_arg2 = 0;
1918 }
1919
1920 syscall::rename:entry /trackedpid[pid] == 1/ {
1921 self->rename_arg0 = arg0;
1922 self->rename_arg1 = arg1;
1923 }
1924 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
1925 this->rename_arg0 = copyinstr(self->rename_arg0);
1926 this->rename_arg1 = copyinstr(self->rename_arg1);
1927 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
1928 logindex, pid, probefunc,
1929 this->rename_arg0,
1930 this->rename_arg1);
1931 logindex++;
1932 this->rename_arg0 = 0;
1933 this->rename_arg1 = 0;
1934 }
1935 syscall::rename:return /trackedpid[pid] == 1/ {
1936 self->rename_arg0 = 0;
1937 self->rename_arg1 = 0;
1938 }
1939
1940 /* Track chdir, it's painful because it is only receiving relative path.
1941 */
1942 syscall::chdir:entry /trackedpid[pid] == 1/ {
1943 self->chdir_arg0 = arg0;
1944 }
1945 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
1946 this->chdir_arg0 = copyinstr(self->chdir_arg0);
1947 printf("%d %d %s(\\"%s\\")\\n",
1948 logindex, pid, probefunc,
1949 this->chdir_arg0);
1950 logindex++;
1951 this->chdir_arg0 = 0;
1952 }
1953 syscall::chdir:return /trackedpid[pid] == 1/ {
1954 self->chdir_arg0 = 0;
1955 }
1956 """
1957
1958 # execve-specific code, tends to throw a lot of exceptions.
1959 D_CODE_EXECVE = """
1960 /* Finally what we care about! */
1961 syscall::exec*:entry /trackedpid[pid]/ {
1962 self->exec_arg0 = copyinstr(arg0);
1963 /* Incrementally probe for a NULL in the argv parameter of execve() to
1964 * figure out argc. */
1965 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
1966 * found. */
1967 self->exec_argc = 0;
1968 /* Probe for argc==1 */
1969 this->exec_argv = (user_addr_t*)copyin(
1970 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1971 self->exec_argc = this->exec_argv[self->exec_argc] ?
1972 (self->exec_argc + 1) : self->exec_argc;
1973
1974 /* Probe for argc==2 */
1975 this->exec_argv = (user_addr_t*)copyin(
1976 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1977 self->exec_argc = this->exec_argv[self->exec_argc] ?
1978 (self->exec_argc + 1) : self->exec_argc;
1979
1980 /* Probe for argc==3 */
1981 this->exec_argv = (user_addr_t*)copyin(
1982 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1983 self->exec_argc = this->exec_argv[self->exec_argc] ?
1984 (self->exec_argc + 1) : self->exec_argc;
1985
1986 /* Probe for argc==4 */
1987 this->exec_argv = (user_addr_t*)copyin(
1988 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1989 self->exec_argc = this->exec_argv[self->exec_argc] ?
1990 (self->exec_argc + 1) : self->exec_argc;
1991
1992 /* Copy the inputs strings since there is no guarantee they'll be
1993 * present after the call completed. */
1994 self->exec_argv0 = (self->exec_argc > 0) ?
1995 copyinstr(this->exec_argv[0]) : "";
1996 self->exec_argv1 = (self->exec_argc > 1) ?
1997 copyinstr(this->exec_argv[1]) : "";
1998 self->exec_argv2 = (self->exec_argc > 2) ?
1999 copyinstr(this->exec_argv[2]) : "";
2000 self->exec_argv3 = (self->exec_argc > 3) ?
2001 copyinstr(this->exec_argv[3]) : "";
2002 this->exec_argv = 0;
2003 }
2004 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2005 /* We need to join strings here, as using multiple printf() would
2006 * cause tearing when multiple threads/processes are traced.
2007 * Since it is impossible to escape a string and join it to another one,
2008 * like sprintf("%s%S", previous, more), use hackery.
2009 * Each of the elements are split with a \\1. \\0 cannot be used because
2010 * it is simply ignored. This will conflict with any program putting a
2011 * \\1 in their execve() string but this should be "rare enough" */
2012 this->args = "";
2013 /* Process exec_argv[0] */
2014 this->args = strjoin(
2015 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2016
2017 /* Process exec_argv[1] */
2018 this->args = strjoin(
2019 this->args, (self->exec_argc > 1) ? "\\1" : "");
2020 this->args = strjoin(
2021 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2022
2023 /* Process exec_argv[2] */
2024 this->args = strjoin(
2025 this->args, (self->exec_argc > 2) ? "\\1" : "");
2026 this->args = strjoin(
2027 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2028
2029 /* Process exec_argv[3] */
2030 this->args = strjoin(
2031 this->args, (self->exec_argc > 3) ? "\\1" : "");
2032 this->args = strjoin(
2033 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2034
2035 /* Prints self->exec_argc to permits verifying the internal
2036 * consistency since this code is quite fishy. */
2037 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2038 logindex, pid, probefunc,
2039 self->exec_arg0,
2040 self->exec_argc,
2041 this->args);
2042 logindex++;
2043 this->args = 0;
2044 }
2045 syscall::exec*:return /trackedpid[pid]/ {
2046 self->exec_arg0 = 0;
2047 self->exec_argc = 0;
2048 self->exec_argv0 = 0;
2049 self->exec_argv1 = 0;
2050 self->exec_argv2 = 0;
2051 self->exec_argv3 = 0;
2052 }
2053 """
2054
2055 # Code currently not used.
2056 D_EXTRANEOUS = """
2057 /* This is a good learning experience, since it traces a lot of things
2058 * related to the process and child processes.
2059 * Warning: it generates a gigantic log. For example, tracing
2060 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2061 * several minutes to execute.
2062 */
2063 /*
2064 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2065 printf("%d %d %s_%s() = %d\\n",
2066 logindex, pid, probeprov, probefunc, errno);
2067 logindex++;
2068 }
2069 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2070 printf("%d %d %s_%s() = %d\\n",
2071 logindex, pid, probeprov, probefunc, errno);
2072 logindex++;
2073 }
2074 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2075 printf("%d %d %s_%s() = %d\\n",
2076 logindex, pid, probeprov, probefunc, errno);
2077 logindex++;
2078 }
2079 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2080 printf("%d %d %s_%s() = %d\\n",
2081 logindex, pid, probeprov, probefunc, errno);
2082 logindex++;
2083 }
2084 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2085 printf("%d %d %s_%s() = %d\\n",
2086 logindex, pid, probeprov, probefunc, errno);
2087 logindex++;
2088 }
2089 */
2090 /* TODO(maruel): *stat* functions and friends
2091 syscall::access:return,
2092 syscall::chdir:return,
2093 syscall::chflags:return,
2094 syscall::chown:return,
2095 syscall::chroot:return,
2096 syscall::getattrlist:return,
2097 syscall::getxattr:return,
2098 syscall::lchown:return,
2099 syscall::lstat64:return,
2100 syscall::lstat:return,
2101 syscall::mkdir:return,
2102 syscall::pathconf:return,
2103 syscall::readlink:return,
2104 syscall::removexattr:return,
2105 syscall::setxattr:return,
2106 syscall::stat64:return,
2107 syscall::stat:return,
2108 syscall::truncate:return,
2109 syscall::unlink:return,
2110 syscall::utimes:return,
2111 */
2112 """
2113
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002114 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002115 """Starts the log collection with dtrace.
2116
2117 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2118 this needs to wait for dtrace to be "warmed up".
2119 """
2120 super(Dtrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002121 # This script is used as a signal to figure out the root process.
2122 self._signal_script = create_subprocess_thunk()
2123 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002124 # This unique dummy temp file is used to signal the dtrace script that it
2125 # should stop as soon as all the child processes are done. A bit hackish
2126 # but works fine enough.
2127 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
Marc-Antoine Ruel3c979cb2015-03-11 13:43:28 -04002128 prefix=u'trace_signal_file')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002129
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002130 dtrace_path = '/usr/sbin/dtrace'
maruel12e30012015-10-09 11:55:35 -07002131 if not fs.isfile(dtrace_path):
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002132 dtrace_path = 'dtrace'
maruel12e30012015-10-09 11:55:35 -07002133 elif use_sudo is None and (fs.stat(dtrace_path).st_mode & stat.S_ISUID):
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002134 # No need to sudo. For those following at home, don't do that.
2135 use_sudo = False
2136
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002137 # Note: do not use the -p flag. It's useless if the initial process quits
2138 # too fast, resulting in missing traces from the grand-children. The D
2139 # code manages the dtrace lifetime itself.
2140 trace_cmd = [
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002141 dtrace_path,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002142 # Use a larger buffer if getting 'out of scratch space' errors.
2143 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2144 '-b', '10m',
2145 '-x', 'dynvarsize=10m',
2146 #'-x', 'dtrace_global_maxsize=1m',
2147 '-x', 'evaltime=exec',
2148 '-o', '/dev/stderr',
2149 '-q',
2150 '-n', self._get_dtrace_code(),
2151 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002152 if use_sudo is not False:
2153 trace_cmd.insert(0, 'sudo')
2154
maruel12e30012015-10-09 11:55:35 -07002155 with fs.open(self._logname + '.log', 'wb') as logfile:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002156 self._dtrace = subprocess.Popen(
2157 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2158 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2159
2160 # Reads until one line is printed, which signifies dtrace is up and ready.
maruel12e30012015-10-09 11:55:35 -07002161 with fs.open(self._logname + '.log', 'rb') as logfile:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002162 while 'dtrace_BEGIN' not in logfile.readline():
2163 if self._dtrace.poll() is not None:
2164 # Do a busy wait. :/
2165 break
2166 logging.debug('dtrace started')
2167
2168 def _get_dtrace_code(self):
2169 """Setups the D code to implement child process tracking.
2170
2171 Injects the cookie in the script so it knows when to stop.
2172
2173 The script will detect any instance of the script created with
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002174 create_subprocess_thunk() and will start tracing it.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002175 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002176 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002177 'inline int PID = %d;\n'
2178 'inline string SCRIPT = "%s";\n'
2179 'inline int FILE_ID = %d;\n'
2180 '\n'
2181 '%s') % (
2182 os.getpid(),
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002183 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002184 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002185 self.D_CODE)
2186 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2187 # Do not enable by default since it tends to spew dtrace: error lines
2188 # because the execve() parameters are not in valid memory at the time of
2189 # logging.
2190 # TODO(maruel): Find a way to make this reliable since it's useful but
2191 # only works in limited/trivial uses cases for now.
2192 out += self.D_CODE_EXECVE
2193 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002194
2195 def trace(self, cmd, cwd, tracename, output):
2196 """Runs dtrace on an executable.
2197
2198 This dtruss is broken when it starts the process itself or when tracing
2199 child processes, this code starts a wrapper process
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002200 generated with create_subprocess_thunk() which starts the executable to
2201 trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002202 """
2203 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2204 assert os.path.isabs(cmd[0]), cmd[0]
2205 assert os.path.isabs(cwd), cwd
2206 assert os.path.normpath(cwd) == cwd, cwd
2207 with self._lock:
2208 if not self._initialized:
2209 raise TracingFailure(
2210 'Called Tracer.trace() on an unitialized object',
2211 None, None, None, tracename)
2212 assert tracename not in (i['trace'] for i in self._traces)
2213
2214 # Starts the script wrapper to start the child process. This signals the
2215 # dtrace script that this process is to be traced.
2216 stdout = stderr = None
2217 if output:
2218 stdout = subprocess.PIPE
2219 stderr = subprocess.STDOUT
2220 child_cmd = [
2221 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002222 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002223 tracename,
2224 ]
2225 # Call a dummy function so that dtrace knows I'm about to launch a process
2226 # that needs to be traced.
2227 # Yummy.
2228 child = subprocess.Popen(
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00002229 child_cmd + tools.fix_python_path(cmd),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002230 stdin=subprocess.PIPE,
2231 stdout=stdout,
2232 stderr=stderr,
2233 cwd=cwd)
2234 logging.debug('Started child pid: %d' % child.pid)
2235
2236 out = child.communicate()[0]
2237 # This doesn't mean tracing is done, one of the grand-child process may
2238 # still be alive. It will be tracked with the dtrace script.
2239
2240 with self._lock:
2241 assert tracename not in (i['trace'] for i in self._traces)
2242 self._traces.append(
2243 {
2244 'cmd': cmd,
2245 'cwd': cwd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002246 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002247 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002248 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002249 })
2250 return child.returncode, out
2251
2252 def close(self, timeout=None):
2253 """Terminates dtrace."""
2254 logging.debug('close(%s)' % timeout)
2255 try:
2256 try:
2257 super(Dtrace.Tracer, self).close(timeout)
2258 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002259 # ftruncate doesn't exist on Windows.
2260 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002261 if timeout:
2262 start = time.time()
2263 # Use polling. :/
2264 while (self._dtrace.poll() is None and
2265 (time.time() - start) < timeout):
2266 time.sleep(0.1)
2267 self._dtrace.kill()
2268 self._dtrace.wait()
2269 finally:
2270 # Make sure to kill it in any case.
2271 if self._dtrace.poll() is None:
2272 try:
2273 self._dtrace.kill()
2274 self._dtrace.wait()
2275 except OSError:
2276 pass
2277
2278 if self._dtrace.returncode != 0:
2279 # Warn about any dtrace failure but basically ignore it.
2280 print 'dtrace failure: %s' % self._dtrace.returncode
2281 finally:
2282 os.close(self._dummy_file_id)
maruel12e30012015-10-09 11:55:35 -07002283 fs.remove(self._dummy_file_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002284
2285 def post_process_log(self):
2286 """Sorts the log back in order when each call occured.
2287
2288 dtrace doesn't save the buffer in strict order since it keeps one buffer
2289 per CPU.
2290 """
2291 super(Dtrace.Tracer, self).post_process_log()
2292 logname = self._logname + '.log'
maruel12e30012015-10-09 11:55:35 -07002293 with fs.open(logname, 'rb') as logfile:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002294 lines = [l for l in logfile if l.strip()]
2295 errors = [l for l in lines if l.startswith('dtrace:')]
2296 if errors:
2297 raise TracingFailure(
2298 'Found errors in the trace: %s' % '\n'.join(errors),
2299 None, None, None, logname)
2300 try:
2301 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2302 except ValueError:
2303 raise TracingFailure(
2304 'Found errors in the trace: %s' % '\n'.join(
2305 l for l in lines if l.split(' ', 1)[0].isdigit()),
2306 None, None, None, logname)
maruel12e30012015-10-09 11:55:35 -07002307 with fs.open(logname, 'wb') as logfile:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002308 logfile.write(''.join(lines))
2309
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002310 def __init__(self, use_sudo=None):
2311 super(Dtrace, self).__init__()
2312 self.use_sudo = use_sudo
2313
2314 def get_tracer(self, logname):
2315 return self.Tracer(logname, self.use_sudo)
2316
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002317 @staticmethod
2318 def clean_trace(logname):
2319 for ext in ('', '.log'):
maruel12e30012015-10-09 11:55:35 -07002320 if fs.isfile(logname + ext):
2321 fs.remove(logname + ext)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002322
2323 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002324 def parse_log(cls, logname, blacklist, trace_name):
2325 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002326 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002327
2328 def blacklist_more(filepath):
2329 # All the HFS metadata is in the form /.vol/...
2330 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2331
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05002332 data = tools.read_json(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002333 out = []
2334 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002335 if trace_name and item['trace'] != trace_name:
2336 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002337 result = {
2338 'output': item['output'],
2339 'trace': item['trace'],
2340 }
2341 try:
2342 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002343 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2344 # be valid UTF-8 and we control the log output.
2345 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002346 context.on_line(line)
2347 result['results'] = context.to_results()
2348 except TracingFailure:
2349 result['exception'] = sys.exc_info()
2350 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002351 return out
2352
2353
2354class LogmanTrace(ApiBase):
2355 """Uses the native Windows ETW based tracing functionality to trace a child
2356 process.
2357
2358 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2359 the Windows Kernel doesn't have a concept of 'current working directory' at
2360 all. A Win32 process has a map of current directories, one per drive letter
2361 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2362 opened relative to another file_object or as an absolute path. All the current
2363 working directory logic is done in user mode.
2364 """
2365 class Context(ApiBase.Context):
2366 """Processes a ETW log line and keeps the list of existent and non
2367 existent files accessed.
2368
2369 Ignores directories.
2370 """
2371 # These indexes are for the stripped version in json.
2372 EVENT_NAME = 0
2373 TYPE = 1
2374 PID = 2
2375 TID = 3
2376 PROCESSOR_ID = 4
2377 TIMESTAMP = 5
2378 USER_DATA = 6
2379
2380 class Process(ApiBase.Context.Process):
2381 def __init__(self, *args):
2382 super(LogmanTrace.Context.Process, self).__init__(*args)
2383 # Handle file objects that succeeded.
2384 self.file_objects = {}
2385
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002386 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2387 logging.info(
2388 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2389 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002390 super(LogmanTrace.Context, self).__init__(blacklist)
maruel@chromium.org561d4b22013-09-26 21:08:08 +00002391 self._drive_map = file_path.DosDriveMap()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002392 # Threads mapping to the corresponding process id.
2393 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002394 # Process ID of the tracer, e.g. the temporary script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002395 # create_subprocess_thunk(). This is tricky because the process id may
2396 # have been reused.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002397 self._thunk_pid = thunk_pid
2398 self._thunk_cmd = thunk_cmd
2399 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002400 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002401 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002402
2403 def on_line(self, line):
2404 """Processes a json Event line."""
2405 self._line_number += 1
2406 try:
2407 # By Opcode
2408 handler = getattr(
2409 self,
2410 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2411 None)
2412 if not handler:
2413 raise TracingFailure(
2414 'Unexpected event %s_%s' % (
2415 line[self.EVENT_NAME], line[self.TYPE]),
2416 None, None, None)
2417 handler(line)
2418 except TracingFailure, e:
2419 # Hack in the values since the handler could be a static function.
2420 e.pid = line[self.PID]
2421 e.line = line
2422 e.line_number = self._line_number
2423 # Re-raise the modified exception.
2424 raise
2425 except (KeyError, NotImplementedError, ValueError), e:
2426 raise TracingFailure(
2427 'Trace generated a %s exception: %s' % (
2428 e.__class__.__name__, str(e)),
2429 line[self.PID],
2430 self._line_number,
2431 line,
2432 e)
2433
2434 def to_results(self):
2435 if not self.root_process:
2436 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002437 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002438 None, None, None)
2439 process = self.root_process.to_results_process()
2440 return Results(process)
2441
2442 def _thread_to_process(self, tid):
2443 """Finds the process from the thread id."""
2444 tid = int(tid, 16)
2445 pid = self._threads_active.get(tid)
2446 if not pid or not self._process_lookup.get(pid):
2447 return
2448 return self._process_lookup[pid]
2449
2450 @classmethod
2451 def handle_EventTrace_Header(cls, line):
2452 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2453 BUFFER_SIZE = cls.USER_DATA
2454 #VERSION = cls.USER_DATA + 1
2455 #PROVIDER_VERSION = cls.USER_DATA + 2
2456 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2457 #END_TIME = cls.USER_DATA + 4
2458 #TIMER_RESOLUTION = cls.USER_DATA + 5
2459 #MAX_FILE_SIZE = cls.USER_DATA + 6
2460 #LOG_FILE_MODE = cls.USER_DATA + 7
2461 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2462 #START_BUFFERS = cls.USER_DATA + 9
2463 #POINTER_SIZE = cls.USER_DATA + 10
2464 EVENTS_LOST = cls.USER_DATA + 11
2465 #CPU_SPEED = cls.USER_DATA + 12
2466 #LOGGER_NAME = cls.USER_DATA + 13
2467 #LOG_FILE_NAME = cls.USER_DATA + 14
2468 #BOOT_TIME = cls.USER_DATA + 15
2469 #PERF_FREQ = cls.USER_DATA + 16
2470 #START_TIME = cls.USER_DATA + 17
2471 #RESERVED_FLAGS = cls.USER_DATA + 18
2472 #BUFFERS_LOST = cls.USER_DATA + 19
2473 #SESSION_NAME_STRING = cls.USER_DATA + 20
2474 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2475 if line[EVENTS_LOST] != '0':
2476 raise TracingFailure(
2477 ( '%s events were lost during trace, please increase the buffer '
2478 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2479 None, None, None)
2480
2481 def handle_FileIo_Cleanup(self, line):
2482 """General wisdom: if a file is closed, it's because it was opened.
2483
2484 Note that FileIo_Close is not used since if a file was opened properly but
2485 not closed before the process exits, only Cleanup will be logged.
2486 """
2487 #IRP = self.USER_DATA
2488 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2489 FILE_OBJECT = self.USER_DATA + 2
2490 #FILE_KEY = self.USER_DATA + 3
2491 proc = self._thread_to_process(line[TTID])
2492 if not proc:
2493 # Not a process we care about.
2494 return
2495 file_object = line[FILE_OBJECT]
2496 if file_object in proc.file_objects:
maruel@chromium.org538141b2013-06-03 20:57:17 +00002497 filepath, access_type = proc.file_objects.pop(file_object)
2498 proc.add_file(filepath, access_type)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002499
2500 def handle_FileIo_Create(self, line):
2501 """Handles a file open.
2502
2503 All FileIo events are described at
2504 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2505 for some value of 'description'.
2506
2507 " (..) process and thread id values of the IO events (..) are not valid "
2508 http://msdn.microsoft.com/magazine/ee358703.aspx
2509
2510 The FileIo.Create event doesn't return if the CreateFile() call
2511 succeeded, so keep track of the file_object and check that it is
2512 eventually closed with FileIo_Cleanup.
2513 """
2514 #IRP = self.USER_DATA
2515 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2516 FILE_OBJECT = self.USER_DATA + 2
2517 #CREATE_OPTIONS = self.USER_DATA + 3
2518 #FILE_ATTRIBUTES = self.USER_DATA + 4
maruel@chromium.org538141b2013-06-03 20:57:17 +00002519 #SHARE_ACCESS = self.USER_DATA + 5
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002520 OPEN_PATH = self.USER_DATA + 6
2521
2522 proc = self._thread_to_process(line[TTID])
2523 if not proc:
2524 # Not a process we care about.
2525 return
2526
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002527 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002528 # Ignore directories and bare drive right away.
2529 if raw_path.endswith(os.path.sep):
2530 return
2531 filepath = self._drive_map.to_win32(raw_path)
2532 # Ignore bare drive right away. Some may still fall through with format
2533 # like '\\?\X:'
2534 if len(filepath) == 2:
2535 return
2536 file_object = line[FILE_OBJECT]
maruel12e30012015-10-09 11:55:35 -07002537 if fs.isdir(filepath):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002538 # There is no O_DIRECTORY equivalent on Windows. The closed is
2539 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2540 # simply discard directories are they are found.
2541 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002542 # Override any stale file object.
2543 # TODO(maruel): Figure out a way to detect if the file was opened for
2544 # reading or writting. Sadly CREATE_OPTIONS doesn't seem to be of any help
2545 # here. For now mark as None to make it clear we have no idea what it is
2546 # about.
2547 proc.file_objects[file_object] = (filepath, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002548
2549 def handle_FileIo_Rename(self, line):
2550 # TODO(maruel): Handle?
2551 pass
2552
2553 def handle_Process_End(self, line):
2554 pid = line[self.PID]
2555 if self._process_lookup.get(pid):
2556 logging.info('Terminated: %d' % pid)
2557 self._process_lookup[pid] = None
2558 else:
2559 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002560 if self._thunk_process and self._thunk_process.pid == pid:
2561 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002562
2563 def handle_Process_Start(self, line):
2564 """Handles a new child process started by PID."""
2565 #UNIQUE_PROCESS_KEY = self.USER_DATA
2566 PROCESS_ID = self.USER_DATA + 1
2567 #PARENT_PID = self.USER_DATA + 2
2568 #SESSION_ID = self.USER_DATA + 3
2569 #EXIT_STATUS = self.USER_DATA + 4
2570 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2571 #USER_SID = self.USER_DATA + 6
2572 IMAGE_FILE_NAME = self.USER_DATA + 7
2573 COMMAND_LINE = self.USER_DATA + 8
2574
2575 ppid = line[self.PID]
2576 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002577 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002578 logging.debug(
2579 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002580 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002581
2582 if pid == self._thunk_pid:
2583 # Need to ignore processes we don't know about because the log is
2584 # system-wide. self._thunk_pid shall start only one process.
2585 # This is tricky though because Windows *loves* to reuse process id and
2586 # it happens often that the process ID of the thunk script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002587 # create_subprocess_thunk() is reused. So just detecting the pid here is
2588 # not sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002589 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2590 logging.info(
2591 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2592 pid, self._trace_name, command_line, self._thunk_cmd)
2593 return
2594
2595 # TODO(maruel): The check is quite weak. Add the thunk path.
2596 if self._thunk_process:
2597 raise TracingFailure(
2598 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2599 'already set') % (self._thunk_pid, self._thunk_process.pid),
2600 None, None, None)
2601 proc = self.Process(self.blacklist, pid, None)
2602 self._thunk_process = proc
2603 return
2604 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002605 proc = self.Process(self.blacklist, pid, None)
2606 self.root_process = proc
2607 ppid = None
2608 elif self._process_lookup.get(ppid):
2609 proc = self.Process(self.blacklist, pid, None)
2610 self._process_lookup[ppid].children.append(proc)
2611 else:
2612 # Ignore
2613 return
2614 self._process_lookup[pid] = proc
2615
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002616 proc.command = command_line
2617 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002618 # proc.command[0] may be the absolute path of 'executable' but it may be
2619 # anything else too. If it happens that command[0] ends with executable,
2620 # use it, otherwise defaults to the base name.
2621 cmd0 = proc.command[0].lower()
2622 if not cmd0.endswith('.exe'):
2623 # TODO(maruel): That's not strictly true either.
2624 cmd0 += '.exe'
maruel12e30012015-10-09 11:55:35 -07002625 if cmd0.endswith(proc.executable) and fs.isfile(cmd0):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002626 # Fix the path.
2627 cmd0 = cmd0.replace('/', os.path.sep)
2628 cmd0 = os.path.normpath(cmd0)
maruel@chromium.org561d4b22013-09-26 21:08:08 +00002629 proc.executable = file_path.get_native_path_case(cmd0)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002630 logging.info(
2631 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2632
2633 def handle_Thread_End(self, line):
2634 """Has the same parameters as Thread_Start."""
2635 tid = int(line[self.TID], 16)
2636 self._threads_active.pop(tid, None)
2637
2638 def handle_Thread_Start(self, line):
2639 """Handles a new thread created.
2640
2641 Do not use self.PID here since a process' initial thread is created by
2642 the parent process.
2643 """
2644 PROCESS_ID = self.USER_DATA
2645 TTHREAD_ID = self.USER_DATA + 1
2646 #STACK_BASE = self.USER_DATA + 2
2647 #STACK_LIMIT = self.USER_DATA + 3
2648 #USER_STACK_BASE = self.USER_DATA + 4
2649 #USER_STACK_LIMIT = self.USER_DATA + 5
2650 #AFFINITY = self.USER_DATA + 6
2651 #WIN32_START_ADDR = self.USER_DATA + 7
2652 #TEB_BASE = self.USER_DATA + 8
2653 #SUB_PROCESS_TAG = self.USER_DATA + 9
2654 #BASE_PRIORITY = self.USER_DATA + 10
2655 #PAGE_PRIORITY = self.USER_DATA + 11
2656 #IO_PRIORITY = self.USER_DATA + 12
2657 #THREAD_FLAGS = self.USER_DATA + 13
2658 # Do not use self.PID here since a process' initial thread is created by
2659 # the parent process.
2660 pid = int(line[PROCESS_ID], 16)
2661 tid = int(line[TTHREAD_ID], 16)
2662 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2663 self._threads_active[tid] = pid
2664
2665 @classmethod
2666 def supported_events(cls):
2667 """Returns all the procesed events."""
2668 out = []
2669 for member in dir(cls):
2670 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2671 if match:
2672 out.append(match.groups())
2673 return out
2674
2675 class Tracer(ApiBase.Tracer):
2676 # The basic headers.
2677 EXPECTED_HEADER = [
2678 u'Event Name',
2679 u'Type',
2680 u'Event ID',
2681 u'Version',
2682 u'Channel',
2683 u'Level', # 5
2684 u'Opcode',
2685 u'Task',
2686 u'Keyword',
2687 u'PID',
2688 u'TID', # 10
2689 u'Processor Number',
2690 u'Instance ID',
2691 u'Parent Instance ID',
2692 u'Activity ID',
2693 u'Related Activity ID', # 15
2694 u'Clock-Time',
2695 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2696 u'User(ms)', # pretty much useless.
2697 u'User Data', # Extra arguments that are event-specific.
2698 ]
2699 # Only the useful headers common to all entries are listed there. Any column
2700 # at 19 or higher is dependent on the specific event.
2701 EVENT_NAME = 0
2702 TYPE = 1
2703 PID = 9
2704 TID = 10
2705 PROCESSOR_ID = 11
2706 TIMESTAMP = 16
2707 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2708 USER_DATA = 19
2709
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002710 class CsvReader(object):
2711 """CSV reader that reads files generated by tracerpt.exe.
2712
2713 csv.reader() fails to read them properly, it mangles file names quoted
2714 with "" with a comma in it.
2715 """
2716 # 0. Had a ',' or one of the following ' ' after a comma, next should
2717 # be ' ', '"' or string or ',' for an empty field.
2718 ( HAD_DELIMITER,
2719 # 1. Processing an unquoted field up to ','.
2720 IN_STR,
2721 # 2. Processing a new field starting with '"'.
2722 STARTING_STR_QUOTED,
2723 # 3. Second quote in a row at the start of a field. It could be either
2724 # '""foo""' or '""'. Who the hell thought it was a great idea to use
2725 # the same character for delimiting and escaping?
2726 STARTING_SECOND_QUOTE,
2727 # 4. A quote inside a quoted string where the previous character was
2728 # not a quote, so the string is not empty. Can be either: end of a
2729 # quoted string (a delimiter) or a quote escape. The next char must be
2730 # either '"' or ','.
2731 HAD_QUOTE_IN_QUOTED,
2732 # 5. Second quote inside a quoted string.
2733 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
2734 # 6. Processing a field that started with '"'.
2735 IN_STR_QUOTED) = range(7)
2736
2737 def __init__(self, f):
2738 self.f = f
2739
2740 def __iter__(self):
2741 return self
2742
2743 def next(self):
2744 """Splits the line in fields."""
2745 line = self.f.readline()
2746 if not line:
2747 raise StopIteration()
2748 line = line.strip()
2749 fields = []
2750 state = self.HAD_DELIMITER
2751 for i, c in enumerate(line):
2752 if state == self.HAD_DELIMITER:
2753 if c == ',':
2754 # Empty field.
2755 fields.append('')
2756 elif c == ' ':
2757 # Ignore initial whitespaces
2758 pass
2759 elif c == '"':
2760 state = self.STARTING_STR_QUOTED
2761 fields.append('')
2762 else:
2763 # Start of a new field.
2764 state = self.IN_STR
2765 fields.append(c)
2766
2767 elif state == self.IN_STR:
2768 # Do not accept quote inside unquoted field.
2769 assert c != '"', (i, c, line, fields)
2770 if c == ',':
2771 fields[-1] = fields[-1].strip()
2772 state = self.HAD_DELIMITER
2773 else:
2774 fields[-1] = fields[-1] + c
2775
2776 elif state == self.STARTING_STR_QUOTED:
2777 if c == '"':
2778 # Do not store the character yet.
2779 state = self.STARTING_SECOND_QUOTE
2780 else:
2781 state = self.IN_STR_QUOTED
2782 fields[-1] = fields[-1] + c
2783
2784 elif state == self.STARTING_SECOND_QUOTE:
2785 if c == ',':
2786 # It was an empty field. '""' == ''.
2787 state = self.HAD_DELIMITER
2788 else:
2789 fields[-1] = fields[-1] + '"' + c
2790 state = self.IN_STR_QUOTED
2791
2792 elif state == self.HAD_QUOTE_IN_QUOTED:
2793 if c == ',':
2794 # End of the string.
2795 state = self.HAD_DELIMITER
2796 elif c == '"':
2797 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
2798 else:
2799 # The previous double-quote was just an unescaped quote.
2800 fields[-1] = fields[-1] + '"' + c
2801 state = self.IN_STR_QUOTED
2802
2803 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2804 if c == ',':
2805 # End of the string.
2806 state = self.HAD_DELIMITER
2807 fields[-1] = fields[-1] + '"'
2808 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00002809 # That's just how the logger rolls. Revert back to appending the
2810 # char and "guess" it was a quote in a double-quoted string.
2811 state = self.IN_STR_QUOTED
2812 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002813
2814 elif state == self.IN_STR_QUOTED:
2815 if c == '"':
2816 # Could be a delimiter or an escape.
2817 state = self.HAD_QUOTE_IN_QUOTED
2818 else:
2819 fields[-1] = fields[-1] + c
2820
2821 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2822 fields[-1] = fields[-1] + '"'
2823 else:
2824 assert state in (
2825 # Terminated with a normal field.
2826 self.IN_STR,
2827 # Terminated with an empty field.
2828 self.STARTING_SECOND_QUOTE,
2829 # Terminated with a normal quoted field.
2830 self.HAD_QUOTE_IN_QUOTED), (
2831 line, state, fields)
2832 return fields
2833
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002834 def __init__(self, logname):
2835 """Starts the log collection.
2836
2837 Requires administrative access. logman.exe is synchronous so no need for a
2838 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2839 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2840
2841 One can get the list of potentially interesting providers with:
2842 "logman query providers | findstr /i file"
2843 """
2844 super(LogmanTrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002845 self._signal_script = create_subprocess_thunk()
2846 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002847 cmd_start = [
2848 'logman.exe',
2849 'start',
2850 'NT Kernel Logger',
2851 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2852 # splitio,fileiocompletion,syscall,file,cswitch,img
2853 '(process,fileio,thread)',
2854 '-o', self._logname + '.etl',
2855 '-ets', # Send directly to kernel
2856 # Values extracted out of thin air.
2857 # Event Trace Session buffer size in kb.
2858 '-bs', '10240',
2859 # Number of Event Trace Session buffers.
2860 '-nb', '16', '256',
2861 ]
2862 logging.debug('Running: %s' % cmd_start)
2863 try:
2864 subprocess.check_call(
2865 cmd_start,
2866 stdin=subprocess.PIPE,
2867 stdout=subprocess.PIPE,
2868 stderr=subprocess.STDOUT)
2869 except subprocess.CalledProcessError, e:
2870 if e.returncode == -2147024891:
2871 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2872 elif e.returncode == -2144337737:
2873 print >> sys.stderr, (
2874 'A kernel trace was already running, stop it and try again')
2875 raise
2876
2877 def trace(self, cmd, cwd, tracename, output):
2878 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2879 assert os.path.isabs(cmd[0]), cmd[0]
2880 assert os.path.isabs(cwd), cwd
2881 assert os.path.normpath(cwd) == cwd, cwd
2882 with self._lock:
2883 if not self._initialized:
2884 raise TracingFailure(
2885 'Called Tracer.trace() on an unitialized object',
2886 None, None, None, tracename)
2887 assert tracename not in (i['trace'] for i in self._traces)
2888
2889 # Use "logman -?" for help.
2890
2891 stdout = stderr = None
2892 if output:
2893 stdout = subprocess.PIPE
2894 stderr = subprocess.STDOUT
2895
2896 # Run the child process.
2897 logging.debug('Running: %s' % cmd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002898 # Use the temporary script generated with create_subprocess_thunk() so we
2899 # have a clear pid owner. Since trace_inputs.py can be used as a library
2900 # and could trace multiple processes simultaneously, it makes it more
2901 # complex if the executable to be traced is executed directly here. It
2902 # also solves issues related to logman.exe that needs to be executed to
2903 # control the kernel trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002904 child_cmd = [
2905 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002906 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002907 tracename,
2908 ]
2909 child = subprocess.Popen(
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00002910 child_cmd + tools.fix_python_path(cmd),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002911 cwd=cwd,
2912 stdin=subprocess.PIPE,
2913 stdout=stdout,
2914 stderr=stderr)
2915 logging.debug('Started child pid: %d' % child.pid)
2916 out = child.communicate()[0]
2917 # This doesn't mean all the grand-children are done. Sadly, we don't have
2918 # a good way to determine that.
2919
2920 with self._lock:
2921 assert tracename not in (i['trace'] for i in self._traces)
2922 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002923 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002924 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002925 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002926 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002927 # Used to figure out the real process when process ids are reused.
2928 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002929 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002930 })
2931
2932 return child.returncode, out
2933
2934 def close(self, _timeout=None):
2935 """Stops the kernel log collection and converts the traces to text
2936 representation.
2937 """
2938 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00002939 try:
2940 super(LogmanTrace.Tracer, self).close()
2941 finally:
2942 cmd_stop = [
2943 'logman.exe',
2944 'stop',
2945 'NT Kernel Logger',
2946 '-ets', # Sends the command directly to the kernel.
2947 ]
2948 logging.debug('Running: %s' % cmd_stop)
2949 subprocess.check_call(
2950 cmd_stop,
2951 stdin=subprocess.PIPE,
2952 stdout=subprocess.PIPE,
2953 stderr=subprocess.STDOUT)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002954
2955 def post_process_log(self):
2956 """Converts the .etl file into .csv then into .json."""
2957 super(LogmanTrace.Tracer, self).post_process_log()
2958 logformat = 'csv'
2959 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002960 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002961
maruel@chromium.org808e3f22013-05-16 16:42:54 +00002962 def _gen_logdata(self):
2963 return {
2964 'format': 'csv',
2965 'traces': self._traces,
2966 }
2967
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002968 def _trim_log(self, logformat):
2969 """Reduces the amount of data in original log by generating a 'reduced'
2970 log.
2971 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002972 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002973 file_handle = codecs.open(
2974 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002975
2976 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002977 assert sys.getfilesystemencoding() == 'mbcs'
2978 file_handle = codecs.open(
2979 self._logname + '.' + logformat, 'r',
2980 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002981
2982 supported_events = LogmanTrace.Context.supported_events()
2983
2984 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002985 """Loads items from the generator and returns the interesting data.
2986
2987 It filters out any uninteresting line and reduce the amount of data in
2988 the trace.
2989 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002990 for index, line in enumerate(generator):
2991 if not index:
2992 if line != self.EXPECTED_HEADER:
2993 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002994 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002995 None, None, None)
2996 continue
2997 # As you can see, the CSV is full of useful non-redundant information:
2998 if (line[2] != '0' or # Event ID
2999 line[3] not in ('2', '3') or # Version
3000 line[4] != '0' or # Channel
3001 line[5] != '0' or # Level
3002 line[7] != '0' or # Task
3003 line[8] != '0x0000000000000000' or # Keyword
3004 line[12] != '' or # Instance ID
3005 line[13] != '' or # Parent Instance ID
3006 line[14] != self.NULL_GUID or # Activity ID
3007 line[15] != ''): # Related Activity ID
3008 raise TracingFailure(
3009 'Found unexpected values in line: %s' % ' '.join(line),
3010 None, None, None)
3011
3012 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3013 continue
3014
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003015 yield [
3016 line[self.EVENT_NAME],
3017 line[self.TYPE],
3018 line[self.PID],
3019 line[self.TID],
3020 line[self.PROCESSOR_ID],
3021 line[self.TIMESTAMP],
3022 ] + line[self.USER_DATA:]
3023
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003024 # must not convert the trim() call into a list, since it will use too much
3025 # memory for large trace. use a csv file as a workaround since the json
3026 # parser requires a complete in-memory file.
maruel12e30012015-10-09 11:55:35 -07003027 path = os.path.join(
3028 unicode(os.getcwd()), u'%s.preprocessed' % self._logname)
3029 with fs.open(path, 'wb') as f:
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003030 # $ and * can't be used in file name on windows, reducing the likelihood
3031 # of having to escape a string.
3032 out = csv.writer(
3033 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003034 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003035 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003036
3037 def _convert_log(self, logformat):
3038 """Converts the ETL trace to text representation.
3039
3040 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3041 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3042
3043 Arguments:
3044 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3045
3046 Use "tracerpt -?" for help.
3047 """
3048 LOCALE_INVARIANT = 0x7F
3049 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3050 cmd_convert = [
3051 'tracerpt.exe',
3052 '-l', self._logname + '.etl',
3053 '-o', self._logname + '.' + logformat,
3054 '-gmt', # Use UTC
3055 '-y', # No prompt
3056 # Use -of XML to get the header of each items after column 19, e.g. all
3057 # the actual headers of 'User Data'.
3058 ]
3059
3060 if logformat == 'csv':
3061 # tracerpt localizes the 'Type' column, for major brainfuck
3062 # entertainment. I can't imagine any sane reason to do that.
3063 cmd_convert.extend(['-of', 'CSV'])
3064 elif logformat == 'csv_utf16':
3065 # This causes it to use UTF-16, which doubles the log size but ensures
3066 # the log is readable for non-ASCII characters.
3067 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3068 elif logformat == 'xml':
3069 cmd_convert.extend(['-of', 'XML'])
3070 else:
3071 raise ValueError('Unexpected log format \'%s\'' % logformat)
3072 logging.debug('Running: %s' % cmd_convert)
3073 # This can takes tens of minutes for large logs.
3074 # Redirects all output to stderr.
3075 subprocess.check_call(
3076 cmd_convert,
3077 stdin=subprocess.PIPE,
3078 stdout=sys.stderr,
3079 stderr=sys.stderr)
3080
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003081 def __init__(self, use_sudo=False): # pylint: disable=W0613
3082 super(LogmanTrace, self).__init__()
3083 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3084
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003085 @staticmethod
3086 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003087 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel12e30012015-10-09 11:55:35 -07003088 if fs.isfile(logname + ext):
3089 fs.remove(logname + ext)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003090
3091 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003092 def parse_log(cls, logname, blacklist, trace_name):
3093 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003094 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003095
3096 def blacklist_more(filepath):
3097 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3098 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3099
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003100 # Create a list of (Context, result_dict) tuples. This is necessary because
3101 # the csv file may be larger than the amount of available memory.
3102 contexes = [
3103 (
3104 cls.Context(
3105 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3106 {
3107 'output': item['output'],
3108 'trace': item['trace'],
3109 },
3110 )
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05003111 for item in tools.read_json(logname)['traces']
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003112 if not trace_name or item['trace'] == trace_name
3113 ]
3114
3115 # The log may be too large to fit in memory and it is not efficient to read
3116 # it multiple times, so multiplex the contexes instead, which is slightly
3117 # more awkward.
maruel12e30012015-10-09 11:55:35 -07003118 path = os.path.join(unicode(os.getcwd()), u'%s.preprocessed' % logname)
3119 with fs.open(path, 'rb') as f:
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003120 lines = csv.reader(
3121 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3122 for encoded in lines:
3123 line = [s.decode('utf-8') for s in encoded]
3124 # Convert the PID in-place from hex.
3125 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3126 for context in contexes:
3127 if 'exception' in context[1]:
3128 continue
3129 try:
3130 context[0].on_line(line)
3131 except TracingFailure:
3132 context[1]['exception'] = sys.exc_info()
3133
3134 for context in contexes:
3135 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003136 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003137 context[1]['results'] = context[0].to_results()
3138
3139 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003140
3141
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003142def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003143 """Returns the correct implementation for the current OS."""
3144 if sys.platform == 'cygwin':
3145 raise NotImplementedError(
3146 'Not implemented for cygwin, start the script from Win32 python')
3147 flavors = {
3148 'win32': LogmanTrace,
3149 'darwin': Dtrace,
3150 'sunos5': Dtrace,
3151 'freebsd7': Dtrace,
3152 'freebsd8': Dtrace,
3153 }
3154 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003155 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003156
3157
3158def extract_directories(root_dir, files, blacklist):
3159 """Detects if all the files in a directory are in |files| and if so, replace
3160 the individual files by a Results.Directory instance.
3161
3162 Takes a list of Results.File instances and returns a shorter list of
3163 Results.File and Results.Directory instances.
3164
3165 Arguments:
3166 - root_dir: Optional base directory that shouldn't be search further.
3167 - files: list of Results.File instances.
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003168 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003169 """
3170 logging.info(
3171 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3172 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003173 # It is important for root_dir to not be a symlinked path, make sure to call
3174 # os.path.realpath() as needed.
3175 assert not root_dir or (
maruel12e30012015-10-09 11:55:35 -07003176 fs.realpath(file_path.get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003177 assert not any(isinstance(f, Results.Directory) for f in files)
3178 # Remove non existent files.
3179 files = [f for f in files if f.existent]
3180 if not files:
3181 return files
3182 # All files must share the same root, which can be None.
3183 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3184
3185 # Creates a {directory: {filename: File}} mapping, up to root.
3186 buckets = {}
3187 if root_dir:
3188 buckets[root_dir] = {}
3189 for fileobj in files:
3190 path = fileobj.full_path
3191 directory = os.path.dirname(path)
3192 assert directory
3193 # Do not use os.path.basename() so trailing os.path.sep is kept.
3194 basename = path[len(directory)+1:]
3195 files_in_directory = buckets.setdefault(directory, {})
3196 files_in_directory[basename] = fileobj
3197 # Add all the directories recursively up to root.
3198 while True:
3199 old_d = directory
3200 directory = os.path.dirname(directory)
3201 if directory + os.path.sep == root_dir or directory == old_d:
3202 break
3203 buckets.setdefault(directory, {})
3204
3205 root_prefix = len(root_dir) + 1 if root_dir else 0
3206 for directory in sorted(buckets, reverse=True):
maruel12e30012015-10-09 11:55:35 -07003207 if not fs.isdir(directory):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003208 logging.debug(
3209 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3210 continue
maruel12e30012015-10-09 11:55:35 -07003211 actual = set(f for f in fs.listdir(directory) if not blacklist(f))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003212 expected = set(buckets[directory])
3213 if not (actual - expected):
3214 parent = os.path.dirname(directory)
3215 buckets[parent][os.path.basename(directory)] = Results.Directory(
3216 root_dir,
3217 directory[root_prefix:],
3218 False,
3219 sum(f.size for f in buckets[directory].itervalues()),
3220 sum(f.nb_files for f in buckets[directory].itervalues()))
3221 # Remove the whole bucket.
3222 del buckets[directory]
3223
3224 # Reverse the mapping with what remains. The original instances are returned,
3225 # so the cached meta data is kept.
3226 files = sum((x.values() for x in buckets.itervalues()), [])
3227 return sorted(files, key=lambda x: x.path)
3228
3229
3230def trace(logfile, cmd, cwd, api, output):
3231 """Traces an executable. Returns (returncode, output) from api.
3232
3233 Arguments:
3234 - logfile: file to write to.
3235 - cmd: command to run.
3236 - cwd: current directory to start the process in.
3237 - api: a tracing api instance.
3238 - output: if True, returns output, otherwise prints it at the console.
3239 """
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003240 cmd = tools.fix_python_path(cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003241 api.clean_trace(logfile)
3242 with api.get_tracer(logfile) as tracer:
3243 return tracer.trace(cmd, cwd, 'default', output)
3244
3245
maruel@chromium.orge5322512013-08-19 20:17:57 +00003246def CMDclean(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003247 """Cleans up traces."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003248 options, args = parser.parse_args(args)
3249 api = get_api()
3250 api.clean_trace(options.log)
3251 return 0
3252
3253
maruel@chromium.orge5322512013-08-19 20:17:57 +00003254def CMDtrace(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003255 """Traces an executable."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003256 parser.allow_interspersed_args = False
3257 parser.add_option(
3258 '-q', '--quiet', action='store_true',
3259 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003260 parser.add_option(
3261 '-s', '--sudo', action='store_true',
3262 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3263 parser.add_option(
3264 '-n', '--no-sudo', action='store_false',
3265 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003266 options, args = parser.parse_args(args)
3267
3268 if not args:
3269 parser.error('Please provide a command to run')
3270
Marc-Antoine Ruel5906fcd2013-11-08 21:02:24 -05003271 if not can_trace():
3272 parser.error('Please rerun this program with admin privileges')
3273
maruel12e30012015-10-09 11:55:35 -07003274 if not os.path.isabs(args[0]) and fs.access(args[0], os.X_OK):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003275 args[0] = os.path.abspath(args[0])
3276
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003277 # options.sudo default value is None, which is to do whatever tracer defaults
3278 # do.
3279 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003280 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3281
3282
maruel@chromium.orge5322512013-08-19 20:17:57 +00003283def CMDread(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003284 """Reads the logs and prints the result."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003285 parser.add_option(
3286 '-V', '--variable',
3287 nargs=2,
3288 action='append',
3289 dest='variables',
3290 metavar='VAR_NAME directory',
3291 default=[],
3292 help=('Variables to replace relative directories against. Example: '
3293 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3294 'home dir with $HOME') % getpass.getuser())
3295 parser.add_option(
3296 '--root-dir',
3297 help='Root directory to base everything off it. Anything outside of this '
3298 'this directory will not be reported')
3299 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003300 '--trace-name',
3301 help='Only reads one of the trace. Defaults to reading all traces')
3302 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003303 '-j', '--json', action='store_true',
3304 help='Outputs raw result data as json')
3305 parser.add_option(
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003306 '--trace-blacklist', action='append', default=[],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003307 help='List of regexp to use as blacklist filter')
3308 options, args = parser.parse_args(args)
3309
3310 if options.root_dir:
maruel@chromium.org561d4b22013-09-26 21:08:08 +00003311 options.root_dir = file_path.get_native_path_case(
maruel@chromium.org7dc17b32013-04-02 22:27:05 +00003312 unicode(os.path.abspath(options.root_dir)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003313
3314 variables = dict(options.variables)
3315 api = get_api()
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05003316 blacklist = tools.gen_blacklist(options.trace_blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003317 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003318 # Process each trace.
3319 output_as_json = []
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003320 try:
3321 for item in data:
3322 if 'exception' in item:
3323 # Do not abort the other traces.
3324 print >> sys.stderr, (
3325 'Trace %s: Got an exception: %s' % (
3326 item['trace'], item['exception'][1]))
3327 continue
3328 results = item['results']
3329 if options.root_dir:
3330 results = results.strip_root(options.root_dir)
3331
3332 if options.json:
3333 output_as_json.append(results.flatten())
3334 else:
3335 simplified = extract_directories(
3336 options.root_dir, results.files, blacklist)
3337 simplified = [f.replace_variables(variables) for f in simplified]
3338 if len(data) > 1:
3339 print('Trace: %s' % item['trace'])
3340 print('Total: %d' % len(results.files))
3341 print('Non existent: %d' % len(results.non_existent))
3342 for f in results.non_existent:
3343 print(' %s' % f.path)
3344 print(
3345 'Interesting: %d reduced to %d' % (
3346 len(results.existent), len(simplified)))
3347 for f in simplified:
3348 print(' %s' % f.path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003349
3350 if options.json:
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05003351 tools.write_json(sys.stdout, output_as_json, False)
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003352 except KeyboardInterrupt:
3353 return 1
3354 except IOError as e:
3355 if e.errno == errno.EPIPE:
3356 # Do not print a stack trace when the output is piped to less and the user
3357 # quits before the whole output was written.
3358 return 1
3359 raise
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003360 return 0
3361
3362
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04003363class OptionParserTraceInputs(logging_utils.OptionParserWithLogging):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003364 """Adds automatic --log handling."""
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003365
3366 # Disable --log-file options since both --log and --log-file options are
3367 # confusing.
3368 # TODO(vadimsh): Rename --log-file or --log to something else.
3369 enable_log_file = False
3370
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003371 def __init__(self, **kwargs):
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04003372 logging_utils.OptionParserWithLogging.__init__(self, **kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003373 self.add_option(
3374 '-l', '--log', help='Log file to generate or read, required')
3375
3376 def parse_args(self, *args, **kwargs):
3377 """Makes sure the paths make sense.
3378
3379 On Windows, / and \ are often mixed together in a path.
3380 """
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04003381 options, args = logging_utils.OptionParserWithLogging.parse_args(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003382 self, *args, **kwargs)
3383 if not options.log:
3384 self.error('Must supply a log file with -l')
maruel12e30012015-10-09 11:55:35 -07003385 options.log = unicode(os.path.abspath(options.log))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003386 return options, args
3387
3388
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003389def main(argv):
maruel@chromium.orge5322512013-08-19 20:17:57 +00003390 dispatcher = subcommand.CommandDispatcher(__name__)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003391 try:
maruel@chromium.org3d671992013-08-20 00:38:27 +00003392 return dispatcher.execute(
3393 OptionParserTraceInputs(version=__version__), argv)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003394 except TracingFailure, e:
3395 sys.stderr.write('\nError: ')
3396 sys.stderr.write(str(e))
3397 sys.stderr.write('\n')
3398 return 1
3399
3400
3401if __name__ == '__main__':
maruel8e4e40c2016-05-30 06:21:07 -07003402 subprocess42.inhibit_os_error_reporting()
maruel@chromium.orge5322512013-08-19 20:17:57 +00003403 fix_encoding.fix_encoding()
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003404 tools.disable_buffering()
maruel@chromium.orge5322512013-08-19 20:17:57 +00003405 colorama.init()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003406 sys.exit(main(sys.argv[1:]))