blob: 64ffbe26fb9166b6c0acbbd3da95170cf4251ec8 [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
maruelea586f32016-04-05 11:11:33 -07003# Copyright 2012 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07004# Use of this source code is governed under the Apache License, Version 2.0
5# that can be found in the LICENSE file.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00006
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
maruel@chromium.org9e56b592013-05-21 16:56:05 +000023import errno
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000024import getpass
25import glob
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000026import logging
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000027import os
28import re
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000029import stat
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000030import subprocess
31import sys
32import tempfile
33import threading
34import time
35import weakref
36
maruel@chromium.orge5322512013-08-19 20:17:57 +000037from third_party import colorama
38from third_party.depot_tools import fix_encoding
39from third_party.depot_tools import subcommand
40
maruel@chromium.org561d4b22013-09-26 21:08:08 +000041from utils import file_path
maruel12e30012015-10-09 11:55:35 -070042from utils import fs
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -040043from utils import logging_utils
maruel8e4e40c2016-05-30 06:21:07 -070044from utils import subprocess42
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000045from utils import tools
46
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000047## OS-specific imports
48
49if sys.platform == 'win32':
maruel@chromium.org561d4b22013-09-26 21:08:08 +000050 from ctypes.wintypes import byref, c_int, c_wchar_p
51 from ctypes.wintypes import windll # pylint: disable=E0611
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000052
53
maruel12e30012015-10-09 11:55:35 -070054__version__ = '0.2'
maruel@chromium.org3d671992013-08-20 00:38:27 +000055
56
tansella4949442016-06-23 22:34:32 -070057BASE_DIR = os.path.dirname(os.path.abspath(
58 __file__.decode(sys.getfilesystemencoding())))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000059ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
60
61
62class TracingFailure(Exception):
63 """An exception occured during tracing."""
64 def __init__(self, description, pid, line_number, line, *args):
65 super(TracingFailure, self).__init__(
66 description, pid, line_number, line, *args)
67 self.description = description
68 self.pid = pid
69 self.line_number = line_number
70 self.line = line
71 self.extra = args
72
73 def __str__(self):
74 out = self.description
75 if self.pid:
76 out += '\npid: %d' % self.pid
77 if self.line_number:
78 out += '\nline: %d' % self.line_number
79 if self.line:
80 out += '\n%s' % self.line
81 if self.extra:
82 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
83 return out
84
85
86## OS-specific functions
87
88if sys.platform == 'win32':
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000089 def get_current_encoding():
90 """Returns the 'ANSI' code page associated to the process."""
91 return 'cp%d' % int(windll.kernel32.GetACP())
92
93
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000094 def CommandLineToArgvW(command_line):
95 """Splits a commandline into argv using CommandLineToArgvW()."""
96 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
97 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +000098 assert isinstance(command_line, unicode)
99 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000100 try:
101 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
102 finally:
103 windll.kernel32.LocalFree(ptr)
104
105
Marc-Antoine Ruel5906fcd2013-11-08 21:02:24 -0500106def can_trace():
107 """Returns True if the user is an administrator on Windows.
108
109 It is required for tracing to work.
110 """
111 if sys.platform != 'win32':
112 return True
113 return bool(windll.shell32.IsUserAnAdmin())
114
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000115
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000116def create_subprocess_thunk():
117 """Creates a small temporary script to start the child process.
118
119 This thunk doesn't block, its unique name is used to identify it as the
120 parent.
121 """
Marc-Antoine Ruel3c979cb2015-03-11 13:43:28 -0400122 handle, name = tempfile.mkstemp(prefix=u'trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000123 try:
124 os.write(
125 handle,
126 (
127 'import subprocess, sys\n'
128 'sys.exit(subprocess.call(sys.argv[2:]))\n'
129 ))
130 finally:
131 os.close(handle)
132 return name
133
134
135def create_exec_thunk():
136 """Creates a small temporary script to start the child executable.
137
138 Reads from the file handle provided as the fisrt argument to block, then
139 execv() the command to be traced.
140 """
Marc-Antoine Ruel3c979cb2015-03-11 13:43:28 -0400141 handle, name = tempfile.mkstemp(prefix=u'trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000142 try:
143 os.write(
144 handle,
145 (
146 'import os, sys\n'
147 'fd = int(sys.argv[1])\n'
148 # This will block until the controlling process writes a byte on the
149 # pipe. It will do so once the tracing tool, e.g. strace, is ready to
150 # trace.
151 'os.read(fd, 1)\n'
152 'os.close(fd)\n'
153 'os.execv(sys.argv[2], sys.argv[2:])\n'
154 ))
155 finally:
156 os.close(handle)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000157 return name
158
159
160def strace_process_quoted_arguments(text):
161 """Extracts quoted arguments on a string and return the arguments as a list.
162
163 Implemented as an automaton. Supports incomplete strings in the form
164 '"foo"...'.
165
166 Example:
167 With text = '"foo", "bar"', the function will return ['foo', 'bar']
168
169 TODO(maruel): Implement escaping.
170 """
171 # All the possible states of the DFA.
172 ( NEED_QUOTE, # Begining of a new arguments.
173 INSIDE_STRING, # Inside an argument.
174 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
175 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
176 # a serie of 3 dots or a comma.
177 NEED_SPACE, # Right after a comma
178 NEED_DOT_2, # Found a dot, need a second one.
179 NEED_DOT_3, # Found second dot, need a third one.
180 NEED_COMMA, # Found third dot, need a comma.
181 ) = range(8)
182
183 state = NEED_QUOTE
184 out = []
185 for index, char in enumerate(text):
186 if char == '"':
187 if state == NEED_QUOTE:
188 state = INSIDE_STRING
189 # A new argument was found.
190 out.append('')
191 elif state == INSIDE_STRING:
192 # The argument is now closed.
193 state = NEED_COMMA_OR_DOT
194 elif state == ESCAPED:
195 out[-1] += char
196 state = INSIDE_STRING
197 else:
198 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000199 'Can\'t process char \'%s\' at column %d for: %r' % (
200 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000201 index,
202 text)
203 elif char == ',':
204 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
205 state = NEED_SPACE
206 elif state == INSIDE_STRING:
207 out[-1] += char
208 elif state == ESCAPED:
209 out[-1] += char
210 state = INSIDE_STRING
211 else:
212 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000213 'Can\'t process char \'%s\' at column %d for: %r' % (
214 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000215 index,
216 text)
217 elif char == ' ':
218 if state == NEED_SPACE:
219 state = NEED_QUOTE
220 elif state == INSIDE_STRING:
221 out[-1] += char
222 elif state == ESCAPED:
223 out[-1] += char
224 state = INSIDE_STRING
225 else:
226 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000227 'Can\'t process char \'%s\' at column %d for: %r' % (
228 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000229 index,
230 text)
231 elif char == '.':
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000232 if state in (NEED_QUOTE, NEED_COMMA_OR_DOT):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000233 # The string is incomplete, this mean the strace -s flag should be
234 # increased.
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000235 # For NEED_QUOTE, the input string would look like '"foo", ...'.
236 # For NEED_COMMA_OR_DOT, the input string would look like '"foo"...'
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000237 state = NEED_DOT_2
238 elif state == NEED_DOT_2:
239 state = NEED_DOT_3
240 elif state == NEED_DOT_3:
241 state = NEED_COMMA
242 elif state == INSIDE_STRING:
243 out[-1] += char
244 elif state == ESCAPED:
245 out[-1] += char
246 state = INSIDE_STRING
247 else:
248 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000249 'Can\'t process char \'%s\' at column %d for: %r' % (
250 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000251 index,
252 text)
253 elif char == '\\':
254 if state == ESCAPED:
255 out[-1] += char
256 state = INSIDE_STRING
257 elif state == INSIDE_STRING:
258 state = ESCAPED
259 else:
260 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000261 'Can\'t process char \'%s\' at column %d for: %r' % (
262 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000263 index,
264 text)
265 else:
266 if state == INSIDE_STRING:
267 out[-1] += char
268 else:
269 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000270 'Can\'t process char \'%s\' at column %d for: %r' % (
271 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000272 index,
273 text)
274 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
275 raise ValueError(
276 'String is incorrectly terminated: %r' % text,
277 text)
278 return out
279
280
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000281def assert_is_renderable(pseudo_string):
282 """Asserts the input is a valid object to be processed by render()."""
283 assert (
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000284 pseudo_string is None or
285 isinstance(pseudo_string, unicode) or
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000286 hasattr(pseudo_string, 'render')), repr(pseudo_string)
287
288
289def render(pseudo_string):
290 """Converts the pseudo-string to an unicode string."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000291 if pseudo_string is None or isinstance(pseudo_string, unicode):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000292 return pseudo_string
293 return pseudo_string.render()
294
295
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000296class Results(object):
297 """Results of a trace session."""
298
299 class _TouchedObject(object):
300 """Something, a file or a directory, that was accessed."""
301 def __init__(self, root, path, tainted, size, nb_files):
302 logging.debug(
303 '%s(%s, %s, %s, %s, %s)' %
304 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000305 assert_is_renderable(root)
306 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000307 self.root = root
308 self.path = path
309 self.tainted = tainted
310 self.nb_files = nb_files
maruel@chromium.org538141b2013-06-03 20:57:17 +0000311 # Can be used as a cache or a default value, depending on context. In
312 # particular, once self.tainted is True, because the path was replaced
313 # with a variable, it is not possible to look up the file size.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000314 self._size = size
315 # These are cache only.
316 self._real_path = None
317
318 # Check internal consistency.
319 assert path, path
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000320 assert tainted or bool(root) != bool(file_path.isabs(path)), (root, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000321 assert tainted or (
maruel12e30012015-10-09 11:55:35 -0700322 not fs.exists(self.full_path) or
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000323 (self.full_path == file_path.get_native_path_case(self.full_path))), (
324 tainted,
325 self.full_path,
326 file_path.get_native_path_case(self.full_path))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000327
328 @property
329 def existent(self):
330 return self.size != -1
331
332 @property
333 def full_path(self):
334 if self.root:
335 return os.path.join(self.root, self.path)
336 return self.path
337
338 @property
339 def real_path(self):
340 """Returns the path with symlinks resolved."""
341 if not self._real_path:
maruel5ab2e6c2016-08-03 12:00:37 -0700342 self._real_path = os.path.realpath(self.full_path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000343 return self._real_path
344
345 @property
346 def size(self):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000347 """File's size. -1 is not existent.
348
349 Once tainted, it is not possible the retrieve the file size anymore since
350 the path is composed of variables.
351 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000352 if self._size is None and not self.tainted:
353 try:
maruel12e30012015-10-09 11:55:35 -0700354 self._size = fs.stat(self.full_path).st_size
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000355 except OSError:
356 self._size = -1
357 return self._size
358
359 def flatten(self):
360 """Returns a dict representing this object.
361
362 A 'size' of 0 means the file was only touched and not read.
363 """
364 return {
365 'path': self.path,
366 'size': self.size,
367 }
368
369 def replace_variables(self, variables):
370 """Replaces the root of this File with one of the variables if it matches.
371
372 If a variable replacement occurs, the cloned object becomes tainted.
373 """
374 for variable, root_path in variables.iteritems():
375 if self.path.startswith(root_path):
376 return self._clone(
377 self.root, variable + self.path[len(root_path):], True)
378 # No need to clone, returns ourself.
379 return self
380
381 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000382 """Returns a clone of itself with 'root' stripped off.
383
384 Note that the file is kept if it is either accessible from a symlinked
385 path that was used to access the file or through the real path.
386 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000387 # Check internal consistency.
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000388 assert (
389 self.tainted or
390 (file_path.isabs(root) and root.endswith(os.path.sep))), root
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000391 if not self.full_path.startswith(root):
392 # Now try to resolve the symlinks to see if it can be reached this way.
393 # Only try *after* trying without resolving symlink.
394 if not self.real_path.startswith(root):
395 return None
396 path = self.real_path
397 else:
398 path = self.full_path
399 return self._clone(root, path[len(root):], self.tainted)
400
401 def _clone(self, new_root, new_path, tainted):
402 raise NotImplementedError(self.__class__.__name__)
403
404 class File(_TouchedObject):
405 """A file that was accessed. May not be present anymore.
406
407 If tainted is true, it means it is not a real path anymore as a variable
408 replacement occured.
409
maruel@chromium.org538141b2013-06-03 20:57:17 +0000410 |mode| can be one of None, TOUCHED, READ or WRITE.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000411 """
maruel@chromium.org538141b2013-06-03 20:57:17 +0000412 # Was probed for existence, and it is existent, but was never _opened_.
413 TOUCHED = 't'
414 # Opened for read only and guaranteed to not have been written to.
415 READ = 'r'
416 # Opened for write.
417 WRITE = 'w'
418
419 # They are listed in order of priority. E.g. if a file is traced as TOUCHED
420 # then as WRITE, only keep WRITE. None means no idea, which is a problem on
421 # Windows.
422 ACCEPTABLE_MODES = (None, TOUCHED, READ, WRITE)
423
424 def __init__(self, root, path, tainted, size, mode):
425 assert mode in self.ACCEPTABLE_MODES
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000426 super(Results.File, self).__init__(root, path, tainted, size, 1)
maruel@chromium.org538141b2013-06-03 20:57:17 +0000427 self.mode = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000428
429 def _clone(self, new_root, new_path, tainted):
430 """Clones itself keeping meta-data."""
431 # Keep the self.size and self._real_path caches for performance reason. It
432 # is also important when the file becomes tainted (with a variable instead
433 # of the real path) since self.path is not an on-disk path anymore so
434 # out._size cannot be updated.
maruel@chromium.org538141b2013-06-03 20:57:17 +0000435 out = self.__class__(new_root, new_path, tainted, self.size, self.mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000436 out._real_path = self._real_path
437 return out
438
maruel@chromium.org538141b2013-06-03 20:57:17 +0000439 def flatten(self):
440 out = super(Results.File, self).flatten()
441 out['mode'] = self.mode
442 return out
443
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000444 class Directory(_TouchedObject):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000445 """A directory of files. Must exist.
446
447 For a Directory instance, self.size is not a cache, it's an actual value
448 that is never modified and represents the total size of the files contained
449 in this directory. It is possible that the directory is empty so that
450 size==0; this happens if there's only an invalid symlink in it.
451 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000452 def __init__(self, root, path, tainted, size, nb_files):
453 """path='.' is a valid value and must be handled appropriately."""
454 assert not path.endswith(os.path.sep), path
455 super(Results.Directory, self).__init__(
456 root, path + os.path.sep, tainted, size, nb_files)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000457
458 def flatten(self):
459 out = super(Results.Directory, self).flatten()
460 out['nb_files'] = self.nb_files
461 return out
462
463 def _clone(self, new_root, new_path, tainted):
464 """Clones itself keeping meta-data."""
465 out = self.__class__(
466 new_root,
467 new_path.rstrip(os.path.sep),
468 tainted,
469 self.size,
470 self.nb_files)
471 out._real_path = self._real_path
472 return out
473
474 class Process(object):
475 """A process that was traced.
476
477 Contains references to the files accessed by this process and its children.
478 """
479 def __init__(self, pid, files, executable, command, initial_cwd, children):
480 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
481 self.pid = pid
482 self.files = sorted(files, key=lambda x: x.path)
483 self.children = children
484 self.executable = executable
485 self.command = command
486 self.initial_cwd = initial_cwd
487
488 # Check internal consistency.
489 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
490 f.path for f in self.files)
491 assert isinstance(self.children, list)
492 assert isinstance(self.files, list)
493
494 @property
495 def all(self):
496 for child in self.children:
497 for i in child.all:
498 yield i
499 yield self
500
501 def flatten(self):
502 return {
503 'children': [c.flatten() for c in self.children],
504 'command': self.command,
505 'executable': self.executable,
506 'files': [f.flatten() for f in self.files],
507 'initial_cwd': self.initial_cwd,
508 'pid': self.pid,
509 }
510
511 def strip_root(self, root):
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000512 assert file_path.isabs(root) and root.endswith(os.path.sep), root
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000513 # Loads the files after since they are constructed as objects.
514 out = self.__class__(
515 self.pid,
516 filter(None, (f.strip_root(root) for f in self.files)),
517 self.executable,
518 self.command,
519 self.initial_cwd,
520 [c.strip_root(root) for c in self.children])
521 logging.debug(
522 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
523 return out
524
525 def __init__(self, process):
526 self.process = process
527 # Cache.
528 self._files = None
529
530 def flatten(self):
531 return {
532 'root': self.process.flatten(),
533 }
534
535 @property
536 def files(self):
537 if self._files is None:
538 self._files = sorted(
539 sum((p.files for p in self.process.all), []),
540 key=lambda x: x.path)
541 return self._files
542
543 @property
544 def existent(self):
545 return [f for f in self.files if f.existent]
546
547 @property
548 def non_existent(self):
549 return [f for f in self.files if not f.existent]
550
551 def strip_root(self, root):
552 """Returns a clone with all the files outside the directory |root| removed
553 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000554
555 It keeps files accessible through the |root| directory or that have been
556 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000557 """
558 # Resolve any symlink
maruel5ab2e6c2016-08-03 12:00:37 -0700559 root = os.path.realpath(root)
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000560 root = (
561 file_path.get_native_path_case(root).rstrip(os.path.sep) + os.path.sep)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000562 logging.debug('strip_root(%s)' % root)
563 return Results(self.process.strip_root(root))
564
565
566class ApiBase(object):
567 """OS-agnostic API to trace a process and its children."""
568 class Context(object):
569 """Processes one log line at a time and keeps the list of traced processes.
570
571 The parsing is complicated by the fact that logs are traced out of order for
572 strace but in-order for dtrace and logman. In addition, on Windows it is
573 very frequent that processids are reused so a flat list cannot be used. But
574 at the same time, it is impossible to faithfully construct a graph when the
575 logs are processed out of order. So both a tree and a flat mapping are used,
576 the tree is the real process tree, while the flat mapping stores the last
577 valid process for the corresponding processid. For the strace case, the
578 tree's head is guessed at the last moment.
579 """
580 class Process(object):
581 """Keeps context for one traced child process.
582
583 Logs all the files this process touched. Ignores directories.
584 """
585 def __init__(self, blacklist, pid, initial_cwd):
586 # Check internal consistency.
587 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000588 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000589 self.pid = pid
590 # children are Process instances.
591 self.children = []
592 self.initial_cwd = initial_cwd
593 self.cwd = None
maruel@chromium.org538141b2013-06-03 20:57:17 +0000594 self.files = {}
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000595 self.executable = None
596 self.command = None
597 self._blacklist = blacklist
598
599 def to_results_process(self):
600 """Resolves file case sensitivity and or late-bound strings."""
601 # When resolving files, it's normal to get dupe because a file could be
602 # opened multiple times with different case. Resolve the deduplication
603 # here.
604 def fix_path(x):
605 """Returns the native file path case.
606
607 Converts late-bound strings.
608 """
609 if not x:
610 # Do not convert None instance to 'None'.
611 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000612 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000613 if os.path.isabs(x):
614 # If the path is not absolute, which tends to happen occasionally on
615 # Windows, it is not possible to get the native path case so ignore
616 # that trace. It mostly happens for 'executable' value.
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000617 x = file_path.get_native_path_case(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000618 return x
619
maruel@chromium.org538141b2013-06-03 20:57:17 +0000620 def fix_and_blacklist_path(x, m):
621 """Receives a tuple (filepath, mode) and processes filepath."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000622 x = fix_path(x)
623 if not x:
624 return
625 # The blacklist needs to be reapplied, since path casing could
626 # influence blacklisting.
627 if self._blacklist(x):
628 return
maruel@chromium.org538141b2013-06-03 20:57:17 +0000629 # Filters out directories. Some may have passed through.
maruel12e30012015-10-09 11:55:35 -0700630 if fs.isdir(x):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000631 return
632 return x, m
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000633
maruel@chromium.org538141b2013-06-03 20:57:17 +0000634 # Renders all the files as strings, as some could be RelativePath
635 # instances. It is important to do it first since there could still be
636 # multiple entries with the same path but different modes.
637 rendered = (
638 fix_and_blacklist_path(f, m) for f, m in self.files.iteritems())
639 files = sorted(
640 (f for f in rendered if f),
641 key=lambda x: (x[0], Results.File.ACCEPTABLE_MODES.index(x[1])))
642 # Then converting into a dict will automatically clean up lesser
643 # important values.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000644 files = [
maruel@chromium.org538141b2013-06-03 20:57:17 +0000645 Results.File(None, f, False, None, m)
646 for f, m in dict(files).iteritems()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000647 ]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000648 return Results.Process(
649 self.pid,
650 files,
651 fix_path(self.executable),
652 self.command,
653 fix_path(self.initial_cwd),
654 [c.to_results_process() for c in self.children])
655
maruel@chromium.org538141b2013-06-03 20:57:17 +0000656 def add_file(self, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000657 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000658 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000659 return
maruel@chromium.org538141b2013-06-03 20:57:17 +0000660 logging.debug('add_file(%d, %s, %s)', self.pid, filepath, mode)
661 # Note that filepath and not render(filepath) is added. It is because
662 # filepath could be something else than a string, like a RelativePath
663 # instance for dtrace logs.
664 modes = Results.File.ACCEPTABLE_MODES
665 old_mode = self.files.setdefault(filepath, mode)
666 if old_mode != mode and modes.index(old_mode) < modes.index(mode):
667 # Take the highest value.
668 self.files[filepath] = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000669
670 def __init__(self, blacklist):
671 self.blacklist = blacklist
672 # Initial process.
673 self.root_process = None
674 # dict to accelerate process lookup, to not have to lookup the whole graph
675 # each time.
676 self._process_lookup = {}
677
678 class Tracer(object):
679 """During it's lifetime, the tracing subsystem is enabled."""
680 def __init__(self, logname):
681 self._logname = logname
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000682 self._lock = threading.RLock()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000683 self._traces = []
684 self._initialized = True
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000685 self._scripts_to_cleanup = []
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000686
687 def trace(self, cmd, cwd, tracename, output):
688 """Runs the OS-specific trace program on an executable.
689
690 Arguments:
691 - cmd: The command (a list) to run.
692 - cwd: Current directory to start the child process in.
693 - tracename: Name of the trace in the logname file.
694 - output: If False, redirects output to PIPEs.
695
696 Returns a tuple (resultcode, output) and updates the internal trace
697 entries.
698 """
699 # The implementation adds an item to self._traces.
700 raise NotImplementedError(self.__class__.__name__)
701
702 def close(self, _timeout=None):
703 """Saves the meta-data in the logname file.
704
705 For kernel-based tracing, stops the tracing subsystem.
706
707 Must not be used manually when using 'with' construct.
708 """
709 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000710 if not self._initialized:
711 raise TracingFailure(
712 'Called %s.close() on an unitialized object' %
713 self.__class__.__name__,
714 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000715 try:
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000716 while self._scripts_to_cleanup:
717 try:
maruel12e30012015-10-09 11:55:35 -0700718 fs.remove(self._scripts_to_cleanup.pop())
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000719 except OSError as e:
720 logging.error('Failed to delete a temporary script: %s', e)
Marc-Antoine Ruelde011802013-11-12 15:19:47 -0500721 tools.write_json(self._logname, self._gen_logdata(), False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000722 finally:
723 self._initialized = False
724
725 def post_process_log(self):
726 """Post-processes the log so it becomes faster to load afterward.
727
728 Must not be used manually when using 'with' construct.
729 """
730 assert not self._initialized, 'Must stop tracing first.'
731
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000732 def _gen_logdata(self):
733 """Returns the data to be saved in the trace file."""
734 return {
735 'traces': self._traces,
736 }
737
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000738 def __enter__(self):
739 """Enables 'with' statement."""
740 return self
741
742 def __exit__(self, exc_type, exc_value, traceback):
743 """Enables 'with' statement."""
744 self.close()
745 # If an exception was thrown, do not process logs.
746 if not exc_type:
747 self.post_process_log()
748
749 def get_tracer(self, logname):
750 """Returns an ApiBase.Tracer instance.
751
752 Initializes the tracing subsystem, which is a requirement for kernel-based
753 tracers. Only one tracer instance should be live at a time!
754
755 logname is the filepath to the json file that will contain the meta-data
756 about the logs.
757 """
758 return self.Tracer(logname)
759
760 @staticmethod
761 def clean_trace(logname):
762 """Deletes an old log."""
763 raise NotImplementedError()
764
765 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +0000766 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000767 """Processes trace logs and returns the files opened and the files that do
768 not exist.
769
770 It does not track directories.
771
maruel@chromium.orgec74ff82012-10-29 18:14:47 +0000772 Arguments:
773 - logname: must be an absolute path.
774 - blacklist: must be a lambda.
775 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +0000776
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000777 Most of the time, files that do not exist are temporary test files that
778 should be put in /tmp instead. See http://crbug.com/116251.
779
780 Returns a list of dict with keys:
781 - results: A Results instance.
782 - trace: The corresponding tracename parameter provided to
783 get_tracer().trace().
784 - output: Output gathered during execution, if get_tracer().trace(...,
785 output=False) was used.
786 """
787 raise NotImplementedError(cls.__class__.__name__)
788
789
790class Strace(ApiBase):
791 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000792 @staticmethod
793 def load_filename(filename):
794 """Parses a filename in a log."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000795 # TODO(maruel): Be compatible with strace -x.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000796 assert isinstance(filename, str)
797 out = ''
798 i = 0
799 while i < len(filename):
800 c = filename[i]
801 if c == '\\':
802 out += chr(int(filename[i+1:i+4], 8))
803 i += 4
804 else:
805 out += c
806 i += 1
807 # TODO(maruel): That's not necessarily true that the current code page is
808 # utf-8.
809 return out.decode('utf-8')
810
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000811 class Context(ApiBase.Context):
812 """Processes a strace log line and keeps the list of existent and non
813 existent files accessed.
814
815 Ignores directories.
816
817 Uses late-binding to processes the cwd of each process. The problem is that
818 strace generates one log file per process it traced but doesn't give any
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000819 information about which process was started when and by who. So process the
820 logs out of order and use late binding with RelativePath to be able to
821 deduce the initial directory of each process once all the logs are parsed.
822
823 TODO(maruel): Use the script even in the non-sudo case, so log parsing can
824 be done in two phase: first find the root process, then process the child
825 processes in order. With that, it should be possible to not use RelativePath
826 anymore. This would significantly simplify the code!
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000827 """
828 class Process(ApiBase.Context.Process):
829 """Represents the state of a process.
830
831 Contains all the information retrieved from the pid-specific log.
832 """
833 # Function names are using ([a-z_0-9]+)
834 # This is the most common format. function(args) = result
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000835 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.*?)\)\s+= (.+)$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000836 # An interrupted function call, only grab the minimal header.
837 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
838 # A resumed function call.
839 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
840 # A process received a signal.
841 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
842 # A process didn't handle a signal. Ignore any junk appearing before,
843 # because the process was forcibly killed so it won't open any new file.
844 RE_KILLED = re.compile(
845 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
846 # The process has exited.
847 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
848 # A call was canceled. Ignore any prefix.
849 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
maruel5ab2e6c2016-08-03 12:00:37 -0700850 # The process has exited due to the ptrace sandbox. RE_PROCESS_EXITED will
851 # follow on next line.
852 RE_PTRACE = re.compile(r'^.*<ptrace\(SYSCALL\):No such process>$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000853 # Happens when strace fails to even get the function name.
854 UNNAMED_FUNCTION = '????'
855
856 # Corner-case in python, a class member function decorator must not be
857 # @staticmethod.
858 def parse_args(regexp, expect_zero): # pylint: disable=E0213
859 """Automatically convert the str 'args' into a list of processed
860 arguments.
861
862 Arguments:
863 - regexp is used to parse args.
864 - expect_zero: one of True, False or None.
865 - True: will check for result.startswith('0') first and will ignore
866 the trace line completely otherwise. This is important because for
867 many functions, the regexp will not process if the call failed.
868 - False: will check for not result.startswith(('?', '-1')) for the
869 same reason than with True.
870 - None: ignore result.
871 """
872 def meta_hook(function):
873 assert function.__name__.startswith('handle_')
874 def hook(self, args, result):
875 if expect_zero is True and not result.startswith('0'):
876 return
877 if expect_zero is False and result.startswith(('?', '-1')):
878 return
879 match = re.match(regexp, args)
880 if not match:
881 raise TracingFailure(
882 'Failed to parse %s(%s) = %s' %
883 (function.__name__[len('handle_'):], args, result),
884 None, None, None)
885 return function(self, match.groups(), result)
886 return hook
887 return meta_hook
888
889 class RelativePath(object):
890 """A late-bound relative path."""
891 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000892 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000893 self.parent = parent
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000894 assert (
895 value is None or
896 (isinstance(value, unicode) and not os.path.isabs(value)))
897 self.value = value
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000898 if self.value:
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000899 # TODO(maruel): On POSIX, '\\' is a valid character so remove this
900 # assert.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000901 assert '\\' not in self.value, value
902 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000903
904 def render(self):
905 """Returns the current directory this instance is representing.
906
907 This function is used to return the late-bound value.
908 """
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000909 assert self.parent is not None
910 parent = render(self.parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000911 if self.value:
912 return os.path.normpath(os.path.join(parent, self.value))
913 return parent
914
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000915 def __init__(self, root, pid):
916 """Keeps enough information to be able to guess the original process
917 root.
918
919 strace doesn't store which process was the initial process. So more
920 information needs to be kept so the graph can be reconstructed from the
921 flat map.
922 """
923 logging.info('%s(%d)' % (self.__class__.__name__, pid))
924 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
925 assert isinstance(root, ApiBase.Context)
926 self._root = weakref.ref(root)
927 # The dict key is the function name of the pending call, like 'open'
928 # or 'execve'.
929 self._pending_calls = {}
930 self._line_number = 0
931 # Current directory when the process started.
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000932 if isinstance(self._root(), unicode):
933 self.initial_cwd = self._root()
934 else:
935 self.initial_cwd = self.RelativePath(self._root(), None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000936 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +0000937 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000938
939 def get_cwd(self):
940 """Returns the best known value of cwd."""
941 return self.cwd or self.initial_cwd
942
943 def render(self):
944 """Returns the string value of the RelativePath() object.
945
946 Used by RelativePath. Returns the initial directory and not the
947 current one since the current directory 'cwd' validity is time-limited.
948
949 The validity is only guaranteed once all the logs are processed.
950 """
951 return self.initial_cwd.render()
952
953 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000954 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000955 self._line_number += 1
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000956 try:
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000957 if self._done:
958 raise TracingFailure(
959 'Found a trace for a terminated process or corrupted log',
960 None, None, None)
961
962 if self.RE_SIGNAL.match(line):
963 # Ignore signals.
964 return
965
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000966 match = self.RE_KILLED.match(line)
967 if match:
968 # Converts a '+++ killed by Foo +++' trace into an exit_group().
969 self.handle_exit_group(match.group(1), None)
970 return
971
972 match = self.RE_PROCESS_EXITED.match(line)
973 if match:
974 # Converts a '+++ exited with 1 +++' trace into an exit_group()
975 self.handle_exit_group(match.group(1), None)
976 return
977
978 match = self.RE_UNFINISHED.match(line)
979 if match:
980 if match.group(1) in self._pending_calls:
981 raise TracingFailure(
982 'Found two unfinished calls for the same function',
983 None, None, None,
984 self._pending_calls)
985 self._pending_calls[match.group(1)] = (
986 match.group(1) + match.group(2))
987 return
988
989 match = self.RE_UNAVAILABLE.match(line)
990 if match:
991 # This usually means a process was killed and a pending call was
992 # canceled.
993 # TODO(maruel): Look up the last exit_group() trace just above and
994 # make sure any self._pending_calls[anything] is properly flushed.
995 return
996
maruel5ab2e6c2016-08-03 12:00:37 -0700997 match = self.RE_PTRACE.match(line)
998 if match:
999 # Not sure what this means. Anyhow, the process died.
1000 # TODO(maruel): Add note that only RE_PROCESS_EXITED is valid
1001 # afterward.
1002 return
1003
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001004 match = self.RE_RESUMED.match(line)
1005 if match:
1006 if match.group(1) not in self._pending_calls:
1007 raise TracingFailure(
1008 'Found a resumed call that was not logged as unfinished',
1009 None, None, None,
1010 self._pending_calls)
1011 pending = self._pending_calls.pop(match.group(1))
1012 # Reconstruct the line.
1013 line = pending + match.group(2)
1014
1015 match = self.RE_HEADER.match(line)
1016 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001017 # The line is corrupted. It happens occasionally when a process is
1018 # killed forcibly with activity going on. Assume the process died.
1019 # No other line can be processed afterward.
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001020 logging.debug('%d is done: %s', self.pid, line)
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001021 self._done = True
1022 return
1023
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001024 if match.group(1) == self.UNNAMED_FUNCTION:
1025 return
1026
1027 # It's a valid line, handle it.
1028 handler = getattr(self, 'handle_%s' % match.group(1), None)
1029 if not handler:
1030 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1031 return handler(match.group(2), match.group(3))
1032 except TracingFailure, e:
1033 # Hack in the values since the handler could be a static function.
1034 e.pid = self.pid
1035 e.line = line
1036 e.line_number = self._line_number
1037 # Re-raise the modified exception.
1038 raise
1039 except (KeyError, NotImplementedError, ValueError), e:
1040 raise TracingFailure(
1041 'Trace generated a %s exception: %s' % (
1042 e.__class__.__name__, str(e)),
1043 self.pid,
1044 self._line_number,
1045 line,
1046 e)
1047
1048 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1049 def handle_access(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001050 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001051
1052 @parse_args(r'^\"(.+?)\"$', True)
1053 def handle_chdir(self, args, _result):
1054 """Updates cwd."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001055 self.cwd = self._mangle(args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001056 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1057
maruel@chromium.org538141b2013-06-03 20:57:17 +00001058 @parse_args(r'^\"(.+?)\", (\d+), (\d+)$', False)
1059 def handle_chown(self, args, _result):
1060 # TODO(maruel): Look at result?
1061 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org55550722013-05-28 14:43:14 +00001062
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001063 def handle_clone(self, _args, result):
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001064 self._handling_forking('clone', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001065
1066 def handle_close(self, _args, _result):
1067 pass
1068
maruel@chromium.org538141b2013-06-03 20:57:17 +00001069 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1070 def handle_chmod(self, args, _result):
1071 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001072
maruel5ab2e6c2016-08-03 12:00:37 -07001073 @parse_args(r'^\"(.+?)\"$', False)
1074 def handle_chroot(self, _args, _result):
1075 # This is used by Chromium's sandbox. See
1076 # https://chromium.googlesource.com/chromium/src/+/master/sandbox/linux/suid/sandbox.c
1077 pass
1078
maruel@chromium.org538141b2013-06-03 20:57:17 +00001079 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1080 def handle_creat(self, args, _result):
1081 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001082
1083 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1084 def handle_execve(self, args, _result):
1085 # Even if in practice execve() doesn't returns when it succeeds, strace
1086 # still prints '0' as the result.
1087 filepath = args[0]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001088 self._handle_file(filepath, Results.File.READ)
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001089 self.executable = self._mangle(filepath)
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001090 try:
1091 self.command = strace_process_quoted_arguments(args[1])
1092 except ValueError as e:
1093 raise TracingFailure(
1094 'Failed to process command line argument:\n%s' % e.args[0],
1095 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001096
1097 def handle_exit_group(self, _args, _result):
1098 """Removes cwd."""
1099 self.cwd = None
1100
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001101 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1102 def handle_faccessat(self, args, _results):
1103 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001104 self._handle_file(args[1], Results.File.TOUCHED)
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001105 else:
1106 raise Exception('Relative faccess not implemented.')
1107
maruel@chromium.org0781f322013-05-28 19:45:49 +00001108 def handle_fallocate(self, _args, result):
1109 pass
1110
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001111 def handle_fork(self, args, result):
1112 self._handle_unknown('fork', args, result)
1113
maruel5ab2e6c2016-08-03 12:00:37 -07001114 @parse_args(r'^(\d+), \"(.*?)\", ({.+}), (\d+)$', True)
1115 def handle_fstatat64(self, _args, _result):
1116 # TODO(maruel): Handle.
1117 pass
1118
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001119 def handle_futex(self, _args, _result):
1120 pass
1121
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001122 @parse_args(r'^(\".+?\"|0x[a-f0-9]+), (\d+)$', False)
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001123 def handle_getcwd(self, args, _result):
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001124 # TODO(maruel): Resolve file handle.
1125 if not args[0].startswith('0x'):
1126 filepath = args[0][1:-1]
1127 if os.path.isabs(filepath):
1128 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1129 if not isinstance(self.cwd, unicode):
1130 # Take the occasion to reset the path.
1131 self.cwd = self._mangle(filepath)
1132 else:
1133 # It should always match.
1134 assert self.cwd == Strace.load_filename(filepath), (
1135 self.cwd, filepath)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001136
1137 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1138 def handle_link(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001139 self._handle_file(args[0], Results.File.READ)
1140 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001141
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001142 @parse_args(r'^(\".+?\"|0x[a-f0-9]+), \{(?:.+?, )?\.\.\.\}$', True)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001143 def handle_lstat(self, args, _result):
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001144 # TODO(maruel): Resolve file handle.
1145 if not args[0].startswith('0x'):
1146 self._handle_file(args[0][1:-1], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001147
1148 def handle_mkdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001149 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001150 pass
1151
maruel5ab2e6c2016-08-03 12:00:37 -07001152 @parse_args(r'^(\d+|AT_FDCWD), \".*?\", ({.+}), (\d+)$', True)
1153 def handle_newfstatat(self, _args, _result):
1154 # TODO(maruel): Handle
1155 pass
1156
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001157 @parse_args(r'^(\".*?\"|0x[a-f0-9]+), ([A-Z\_\|]+)(|, \d+)$', False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001158 def handle_open(self, args, _result):
1159 if 'O_DIRECTORY' in args[1]:
1160 return
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001161 # TODO(maruel): Resolve file handle.
1162 if not args[0].startswith('0x'):
1163 t = Results.File.READ if 'O_RDONLY' in args[1] else Results.File.WRITE
1164 self._handle_file(args[0][1:-1], t)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001165
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001166 @parse_args(
1167 r'^(\d+|AT_FDCWD), (\".*?\"|0x[a-f0-9]+), ([A-Z\_\|]+)(|, \d+)$',
1168 False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001169 def handle_openat(self, args, _result):
1170 if 'O_DIRECTORY' in args[2]:
1171 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001172 if args[0] == 'AT_FDCWD':
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001173 # TODO(maruel): Resolve file handle.
1174 if not args[1].startswith('0x'):
1175 t = (
1176 Results.File.READ if 'O_RDONLY' in args[2]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001177 else Results.File.WRITE)
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001178 self._handle_file(args[1][1:-1], t)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001179 else:
1180 # TODO(maruel): Implement relative open if necessary instead of the
1181 # AT_FDCWD flag, let's hope not since this means tracking all active
1182 # directory handles.
maruel@chromium.org538141b2013-06-03 20:57:17 +00001183 raise NotImplementedError('Relative open via openat not implemented.')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001184
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001185 @parse_args(
1186 r'^(\".+?\"|0x[a-f0-9]+), (?:\".+?\"(?:\.\.\.)?|0x[a-f0-9]+), '
1187 '\d+$',
1188 False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001189 def handle_readlink(self, args, _result):
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001190 # TODO(maruel): Resolve file handle.
1191 if not args[0].startswith('0x'):
1192 self._handle_file(args[0][1:-1], Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001193
1194 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1195 def handle_rename(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001196 self._handle_file(args[0], Results.File.READ)
1197 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001198
1199 def handle_rmdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001200 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001201 pass
1202
1203 def handle_setxattr(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001204 # TODO(maruel): self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001205 pass
1206
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001207 @parse_args(r'^(\".+?\"|0x[a-f0-9]+), \{.+?\}$', True)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001208 def handle_stat(self, args, _result):
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001209 # TODO(maruel): Resolve file handle.
1210 if not args[0].startswith('0x'):
1211 self._handle_file(args[0][1:-1], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001212
maruel5ab2e6c2016-08-03 12:00:37 -07001213 @parse_args(r'^(\".+?\"|0x[a-f0-9]+), \{.+?\}$', True)
1214 def handle_stat64(self, args, _result):
1215 if not args[0].startswith('0x'):
1216 self._handle_file(args[0][1:-1], Results.File.TOUCHED)
1217
maruel@chromium.org538141b2013-06-03 20:57:17 +00001218 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1219 def handle_symlink(self, args, _result):
1220 self._handle_file(args[0], Results.File.TOUCHED)
1221 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001222
maruel5ab2e6c2016-08-03 12:00:37 -07001223 @parse_args(
1224 r'^([0-9xa-f]+), ([0-9xa-f]+), ([0-9xa-f]+), ([0-9xa-f]+), '
1225 r'([0-9xa-f]+), ([0-9xa-f]+)$',
1226 False)
1227 def handle_syscall_317(self, _args, _result):
1228 # move_pages()
1229 pass
1230
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001231 @parse_args(r'^\"(.+?)\", \d+', True)
1232 def handle_truncate(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001233 self._handle_file(args[0], Results.File.WRITE)
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001234
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001235 def handle_unlink(self, _args, _result):
1236 # In theory, the file had to be created anyway.
1237 pass
1238
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001239 def handle_unlinkat(self, _args, _result):
1240 # In theory, the file had to be created anyway.
1241 pass
1242
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001243 def handle_statfs(self, _args, _result):
1244 pass
1245
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001246 def handle_utimensat(self, _args, _result):
1247 pass
1248
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001249 def handle_vfork(self, _args, result):
1250 self._handling_forking('vfork', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001251
1252 @staticmethod
1253 def _handle_unknown(function, args, result):
1254 raise TracingFailure(
1255 'Unexpected/unimplemented trace %s(%s)= %s' %
1256 (function, args, result),
1257 None, None, None)
1258
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001259 def _handling_forking(self, name, result):
1260 """Transfers cwd."""
1261 if result.startswith(('?', '-1')):
1262 # The call failed.
1263 return
1264 # Update the other process right away.
1265 childpid = int(result)
maruel5ab2e6c2016-08-03 12:00:37 -07001266 if childpid < 20:
1267 # Ignore, it's probably the Chromium sandbox.
1268 return
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001269 child = self._root().get_or_set_proc(childpid)
1270 if child.parentid is not None or childpid in self.children:
1271 raise TracingFailure(
1272 'Found internal inconsitency in process lifetime detection '
maruel5ab2e6c2016-08-03 12:00:37 -07001273 'during a %s()=%s call:\n%s' % (
1274 name,
1275 result,
1276 sorted(self._root()._process_lookup)),
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001277 None, None, None)
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001278 # Copy the cwd object.
1279 child.initial_cwd = self.get_cwd()
1280 child.parentid = self.pid
1281 # It is necessary because the logs are processed out of order.
1282 self.children.append(child)
1283
maruel@chromium.org538141b2013-06-03 20:57:17 +00001284 def _handle_file(self, filepath, mode):
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001285 filepath = self._mangle(filepath)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001286 self.add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001287
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001288 def _mangle(self, filepath):
1289 """Decodes a filepath found in the log and convert it to a late-bound
1290 path if necessary.
1291
1292 |filepath| is an strace 'encoded' string and the returned value is
1293 either an unicode string if the path was absolute or a late bound path
1294 otherwise.
1295 """
1296 filepath = Strace.load_filename(filepath)
1297 if os.path.isabs(filepath):
1298 return filepath
1299 else:
1300 if isinstance(self.get_cwd(), unicode):
1301 return os.path.normpath(os.path.join(self.get_cwd(), filepath))
1302 return self.RelativePath(self.get_cwd(), filepath)
1303
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001304 def __init__(self, blacklist, root_pid, initial_cwd):
1305 """|root_pid| may be None when the root process is not known.
1306
1307 In that case, a search is done after reading all the logs to figure out
1308 the root process.
1309 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001310 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001311 assert_is_renderable(initial_cwd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001312 self.root_pid = root_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001313 self.initial_cwd = initial_cwd
1314
1315 def render(self):
1316 """Returns the string value of the initial cwd of the root process.
1317
1318 Used by RelativePath.
1319 """
1320 return self.initial_cwd
1321
1322 def on_line(self, pid, line):
1323 """Transfers control into the Process.on_line() function."""
1324 self.get_or_set_proc(pid).on_line(line.strip())
1325
1326 def to_results(self):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001327 """If necessary, finds back the root process and verify consistency."""
1328 if not self.root_pid:
1329 # The non-sudo case. The traced process was started by strace itself,
1330 # so the pid of the traced process is not known.
1331 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1332 if len(root) == 1:
1333 self.root_process = root[0]
1334 # Save it for later.
1335 self.root_pid = self.root_process.pid
maruel5ab2e6c2016-08-03 12:00:37 -07001336 else:
1337 raise TracingFailure(
1338 'Found internal inconsitency in process lifetime detection '
1339 'while finding the root process',
1340 None,
1341 None,
1342 None,
1343 self.root_pid,
1344 sorted(self._process_lookup))
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001345 else:
1346 # The sudo case. The traced process was started manually so its pid is
1347 # known.
1348 self.root_process = self._process_lookup.get(self.root_pid)
1349 if not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001350 raise TracingFailure(
1351 'Found internal inconsitency in process lifetime detection '
1352 'while finding the root process',
1353 None,
1354 None,
1355 None,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001356 self.root_pid,
1357 sorted(self._process_lookup))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001358 process = self.root_process.to_results_process()
1359 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1360 raise TracingFailure(
1361 'Found internal inconsitency in process lifetime detection '
1362 'while looking for len(tree) == len(list)',
1363 None,
1364 None,
1365 None,
1366 sorted(self._process_lookup),
1367 sorted(p.pid for p in process.all))
1368 return Results(process)
1369
1370 def get_or_set_proc(self, pid):
1371 """Returns the Context.Process instance for this pid or creates a new one.
1372 """
1373 if not pid or not isinstance(pid, int):
1374 raise TracingFailure(
1375 'Unpexpected value for pid: %r' % pid,
1376 pid,
1377 None,
1378 None,
1379 pid)
1380 if pid not in self._process_lookup:
1381 self._process_lookup[pid] = self.Process(self, pid)
1382 return self._process_lookup[pid]
1383
1384 @classmethod
1385 def traces(cls):
1386 """Returns the list of all handled traces to pass this as an argument to
1387 strace.
1388 """
1389 prefix = 'handle_'
1390 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1391
1392 class Tracer(ApiBase.Tracer):
1393 MAX_LEN = 256
1394
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001395 def __init__(self, logname, use_sudo):
1396 super(Strace.Tracer, self).__init__(logname)
1397 self.use_sudo = use_sudo
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001398 if use_sudo:
1399 # TODO(maruel): Use the jump script systematically to make it easy to
1400 # figure out the root process, so RelativePath is not necessary anymore.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001401 self._child_script = create_exec_thunk()
1402 self._scripts_to_cleanup.append(self._child_script)
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001403
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001404 def trace(self, cmd, cwd, tracename, output):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001405 """Runs strace on an executable.
1406
1407 When use_sudo=True, it is a 3-phases process: start the thunk, start
1408 sudo strace with the pid of the thunk and then have the thunk os.execve()
1409 the process to trace.
1410 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001411 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1412 assert os.path.isabs(cmd[0]), cmd[0]
1413 assert os.path.isabs(cwd), cwd
1414 assert os.path.normpath(cwd) == cwd, cwd
1415 with self._lock:
1416 if not self._initialized:
1417 raise TracingFailure(
1418 'Called Tracer.trace() on an unitialized object',
1419 None, None, None, tracename)
1420 assert tracename not in (i['trace'] for i in self._traces)
1421 stdout = stderr = None
1422 if output:
1423 stdout = subprocess.PIPE
1424 stderr = subprocess.STDOUT
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001425
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001426 # Ensure all file related APIs are hooked.
maruel5ab2e6c2016-08-03 12:00:37 -07001427 traces = ','.join(
1428 [i for i in Strace.Context.traces()
1429 if not i.startswith('syscall_')] +
1430 ['file'])
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001431 flags = [
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001432 # Each child process has its own trace file. It is necessary because
1433 # strace may generate corrupted log file if multiple processes are
1434 # heavily doing syscalls simultaneously.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001435 '-ff',
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001436 # Reduce whitespace usage.
1437 '-a1',
1438 # hex encode non-ascii strings.
1439 # TODO(maruel): '-x',
1440 # TODO(maruel): '-ttt',
1441 # Signals are unnecessary noise here. Note the parser can cope with them
1442 # but reduce the unnecessary output.
1443 '-esignal=none',
1444 # Print as much data as wanted.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001445 '-s', '%d' % self.MAX_LEN,
1446 '-e', 'trace=%s' % traces,
1447 '-o', self._logname + '.' + tracename,
1448 ]
maruel5ab2e6c2016-08-03 12:00:37 -07001449 logging.info('%s', flags)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001450
1451 if self.use_sudo:
1452 pipe_r, pipe_w = os.pipe()
1453 # Start the child process paused.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001454 target_cmd = [sys.executable, self._child_script, str(pipe_r)] + cmd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001455 logging.debug(' '.join(target_cmd))
1456 child_proc = subprocess.Popen(
1457 target_cmd,
1458 stdin=subprocess.PIPE,
1459 stdout=stdout,
1460 stderr=stderr,
1461 cwd=cwd)
1462
1463 # TODO(maruel): both processes must use the same UID for it to work
1464 # without sudo. Look why -p is failing at the moment without sudo.
1465 trace_cmd = [
1466 'sudo',
1467 'strace',
1468 '-p', str(child_proc.pid),
1469 ] + flags
1470 logging.debug(' '.join(trace_cmd))
1471 strace_proc = subprocess.Popen(
1472 trace_cmd,
1473 cwd=cwd,
1474 stdin=subprocess.PIPE,
1475 stdout=subprocess.PIPE,
1476 stderr=subprocess.PIPE)
1477
1478 line = strace_proc.stderr.readline()
1479 if not re.match(r'^Process \d+ attached \- interrupt to quit$', line):
1480 # TODO(maruel): Raise an exception.
1481 assert False, line
1482
1483 # Now fire the child process.
1484 os.write(pipe_w, 'x')
1485
1486 out = child_proc.communicate()[0]
1487 strace_out = strace_proc.communicate()[0]
1488
1489 # TODO(maruel): if strace_proc.returncode: Add an exception.
1490 saved_out = strace_out if strace_proc.returncode else out
1491 root_pid = child_proc.pid
1492 else:
1493 # Non-sudo case.
1494 trace_cmd = [
1495 'strace',
1496 ] + flags + cmd
1497 logging.debug(' '.join(trace_cmd))
1498 child_proc = subprocess.Popen(
1499 trace_cmd,
1500 cwd=cwd,
1501 stdin=subprocess.PIPE,
1502 stdout=stdout,
1503 stderr=stderr)
1504 out = child_proc.communicate()[0]
1505 # TODO(maruel): Walk the logs and figure out the root process would
1506 # simplify parsing the logs a *lot*.
1507 saved_out = out
1508 # The trace reader will have to figure out.
1509 root_pid = None
1510
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001511 with self._lock:
1512 assert tracename not in (i['trace'] for i in self._traces)
1513 self._traces.append(
1514 {
1515 'cmd': cmd,
1516 'cwd': cwd,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001517 'output': saved_out,
1518 'pid': root_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001519 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001520 })
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001521 return child_proc.returncode, out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001522
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001523 def __init__(self, use_sudo=None):
1524 super(Strace, self).__init__()
1525 self.use_sudo = use_sudo
1526
1527 def get_tracer(self, logname):
1528 return self.Tracer(logname, self.use_sudo)
1529
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001530 @staticmethod
1531 def clean_trace(logname):
maruel12e30012015-10-09 11:55:35 -07001532 if fs.isfile(logname):
1533 fs.remove(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001534 # Also delete any pid specific file from previous traces.
1535 for i in glob.iglob(logname + '.*'):
1536 if i.rsplit('.', 1)[1].isdigit():
maruel12e30012015-10-09 11:55:35 -07001537 fs.remove(i)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001538
1539 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001540 def parse_log(cls, logname, blacklist, trace_name):
1541 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001542 assert os.path.isabs(logname)
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05001543 data = tools.read_json(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001544 out = []
1545 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001546 if trace_name and item['trace'] != trace_name:
1547 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001548 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001549 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001550 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001551 }
1552 try:
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001553 context = cls.Context(blacklist, item['pid'], item['cwd'])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001554 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001555 logging.debug('Reading %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001556 pid = pidfile.rsplit('.', 1)[1]
1557 if pid.isdigit():
1558 pid = int(pid)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001559 found_line = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001560 for line in open(pidfile, 'rb'):
1561 context.on_line(pid, line)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001562 found_line = True
1563 if not found_line:
1564 # Ensures that a completely empty trace still creates the
1565 # corresponding Process instance by logging a dummy line.
1566 context.on_line(pid, '')
1567 else:
1568 logging.warning('Found unexpected file %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001569 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001570 except TracingFailure:
1571 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001572 out.append(result)
1573 return out
1574
1575
1576class Dtrace(ApiBase):
1577 """Uses DTrace framework through dtrace. Requires root access.
1578
1579 Implies Mac OSX.
1580
1581 dtruss can't be used because it has compatibility issues with python.
1582
1583 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1584 get the absolute path of the 'cwd' dtrace variable from the probe.
1585
1586 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1587 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1588 manually.
1589
1590 errno is not printed in the log since this implementation currently only cares
1591 about files that were successfully opened.
1592 """
1593 class Context(ApiBase.Context):
1594 # Format: index pid function(args)
1595 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1596
1597 # Arguments parsing.
1598 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1599 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1600 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1601 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1602 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1603 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1604
maruel@chromium.orgac361162013-06-04 15:54:06 +00001605 # O_DIRECTORY is not defined on Windows and dtrace doesn't exist on Windows.
1606 O_DIRECTORY = os.O_DIRECTORY if hasattr(os, 'O_DIRECTORY') else None
maruel@chromium.org538141b2013-06-03 20:57:17 +00001607 O_RDWR = os.O_RDWR
1608 O_WRONLY = os.O_WRONLY
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001609
1610 class Process(ApiBase.Context.Process):
1611 def __init__(self, *args):
1612 super(Dtrace.Context.Process, self).__init__(*args)
1613 self.cwd = self.initial_cwd
1614
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001615 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001616 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001617 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001618 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001619 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001620 # Process ID of the temporary script created by create_subprocess_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001621 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001622 self._initial_cwd = initial_cwd
1623 self._line_number = 0
1624
1625 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001626 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001627 self._line_number += 1
1628 match = self.RE_HEADER.match(line)
1629 if not match:
1630 raise TracingFailure(
1631 'Found malformed line: %s' % line,
1632 None,
1633 self._line_number,
1634 line)
1635 fn = getattr(
1636 self,
1637 'handle_%s' % match.group(2).replace('-', '_'),
1638 self._handle_ignored)
1639 # It is guaranteed to succeed because of the regexp. Or at least I thought
1640 # it would.
1641 pid = int(match.group(1))
1642 try:
1643 return fn(pid, match.group(3))
1644 except TracingFailure, e:
1645 # Hack in the values since the handler could be a static function.
1646 e.pid = pid
1647 e.line = line
1648 e.line_number = self._line_number
1649 # Re-raise the modified exception.
1650 raise
1651 except (KeyError, NotImplementedError, ValueError), e:
1652 raise TracingFailure(
1653 'Trace generated a %s exception: %s' % (
1654 e.__class__.__name__, str(e)),
1655 pid,
1656 self._line_number,
1657 line,
1658 e)
1659
1660 def to_results(self):
1661 process = self.root_process.to_results_process()
1662 # Internal concistency check.
1663 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1664 raise TracingFailure(
1665 'Found internal inconsitency in process lifetime detection '
1666 'while looking for len(tree) == len(list)',
1667 None,
1668 None,
1669 None,
1670 sorted(self._process_lookup),
1671 sorted(p.pid for p in process.all))
1672 return Results(process)
1673
1674 def handle_dtrace_BEGIN(self, _pid, args):
1675 if not self.RE_DTRACE_BEGIN.match(args):
1676 raise TracingFailure(
1677 'Found internal inconsitency in dtrace_BEGIN log line',
1678 None, None, None)
1679
1680 def handle_proc_start(self, pid, args):
1681 """Transfers cwd.
1682
1683 The dtrace script already takes care of only tracing the processes that
1684 are child of the traced processes so there is no need to verify the
1685 process hierarchy.
1686 """
1687 if pid in self._process_lookup:
1688 raise TracingFailure(
1689 'Found internal inconsitency in proc_start: %d started two times' %
1690 pid,
1691 None, None, None)
1692 match = self.RE_PROC_START.match(args)
1693 if not match:
1694 raise TracingFailure(
1695 'Failed to parse arguments: %s' % args,
1696 None, None, None)
1697 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001698 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001699 proc = self.root_process = self.Process(
1700 self.blacklist, pid, self._initial_cwd)
1701 elif ppid in self._process_lookup:
1702 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1703 self._process_lookup[ppid].children.append(proc)
1704 else:
1705 # Another process tree, ignore.
1706 return
1707 self._process_lookup[pid] = proc
1708 logging.debug(
1709 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001710 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001711
1712 def handle_proc_exit(self, pid, _args):
1713 """Removes cwd."""
1714 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001715 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001716 self._process_lookup[pid].cwd = None
1717
1718 def handle_execve(self, pid, args):
1719 """Sets the process' executable.
1720
1721 TODO(maruel): Read command line arguments. See
1722 https://discussions.apple.com/thread/1980539 for an example.
1723 https://gist.github.com/1242279
1724
1725 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1726 :)
1727 """
1728 if not pid in self._process_lookup:
1729 # Another process tree, ignore.
1730 return
1731 match = self.RE_EXECVE.match(args)
1732 if not match:
1733 raise TracingFailure(
1734 'Failed to parse arguments: %r' % args,
1735 None, None, None)
1736 proc = self._process_lookup[pid]
1737 proc.executable = match.group(1)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001738 self._handle_file(pid, proc.executable, Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001739 proc.command = self.process_escaped_arguments(match.group(3))
1740 if int(match.group(2)) != len(proc.command):
1741 raise TracingFailure(
1742 'Failed to parse execve() arguments: %s' % args,
1743 None, None, None)
1744
1745 def handle_chdir(self, pid, args):
1746 """Updates cwd."""
1747 if pid not in self._process_lookup:
1748 # Another process tree, ignore.
1749 return
1750 cwd = self.RE_CHDIR.match(args).group(1)
1751 if not cwd.startswith('/'):
1752 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1753 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1754 else:
1755 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1756 cwd2 = cwd
1757 self._process_lookup[pid].cwd = cwd2
1758
1759 def handle_open_nocancel(self, pid, args):
1760 """Redirects to handle_open()."""
1761 return self.handle_open(pid, args)
1762
1763 def handle_open(self, pid, args):
1764 if pid not in self._process_lookup:
1765 # Another process tree, ignore.
1766 return
1767 match = self.RE_OPEN.match(args)
1768 if not match:
1769 raise TracingFailure(
1770 'Failed to parse arguments: %s' % args,
1771 None, None, None)
1772 flag = int(match.group(2), 16)
1773 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1774 # Ignore directories.
1775 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001776 self._handle_file(
1777 pid,
1778 match.group(1),
1779 Results.File.READ if not ((self.O_RDWR | self.O_WRONLY) & flag)
1780 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001781
1782 def handle_rename(self, pid, args):
1783 if pid not in self._process_lookup:
1784 # Another process tree, ignore.
1785 return
1786 match = self.RE_RENAME.match(args)
1787 if not match:
1788 raise TracingFailure(
1789 'Failed to parse arguments: %s' % args,
1790 None, None, None)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001791 self._handle_file(pid, match.group(1), Results.File.READ)
1792 self._handle_file(pid, match.group(2), Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001793
maruel@chromium.org538141b2013-06-03 20:57:17 +00001794 def _handle_file(self, pid, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001795 if not filepath.startswith('/'):
1796 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1797 # We can get '..' in the path.
1798 filepath = os.path.normpath(filepath)
1799 # Sadly, still need to filter out directories here;
1800 # saw open_nocancel(".", 0, 0) = 0 lines.
maruel12e30012015-10-09 11:55:35 -07001801 if fs.isdir(filepath):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001802 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001803 self._process_lookup[pid].add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001804
1805 def handle_ftruncate(self, pid, args):
1806 """Just used as a signal to kill dtrace, ignoring."""
1807 pass
1808
1809 @staticmethod
1810 def _handle_ignored(pid, args):
1811 """Is called for all the event traces that are not handled."""
1812 raise NotImplementedError('Please implement me')
1813
1814 @staticmethod
1815 def process_escaped_arguments(text):
1816 """Extracts escaped arguments on a string and return the arguments as a
1817 list.
1818
1819 Implemented as an automaton.
1820
1821 Example:
1822 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1823 function will return ['python2.7', '-c', 'print("hi")]
1824 """
1825 if not text.endswith('\\0'):
1826 raise ValueError('String is not null terminated: %r' % text, text)
1827 text = text[:-2]
1828
1829 def unescape(x):
1830 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1831 out = []
1832 escaped = False
1833 for i in x:
1834 if i == '\\' and not escaped:
1835 escaped = True
1836 continue
1837 escaped = False
1838 out.append(i)
1839 return ''.join(out)
1840
1841 return [unescape(i) for i in text.split('\\001')]
1842
1843 class Tracer(ApiBase.Tracer):
1844 # pylint: disable=C0301
1845 #
1846 # To understand the following code, you'll want to take a look at:
1847 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
1848 # https://wikis.oracle.com/display/DTrace/Variables
1849 # http://docs.oracle.com/cd/E19205-01/820-4221/
1850 #
1851 # 0. Dump all the valid probes into a text file. It is important, you
1852 # want to redirect into a file and you don't want to constantly 'sudo'.
1853 # $ sudo dtrace -l > probes.txt
1854 #
1855 # 1. Count the number of probes:
1856 # $ wc -l probes.txt
1857 # 81823 # On OSX 10.7, including 1 header line.
1858 #
1859 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1860 # likes and skipping the header with NR>1:
1861 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1862 # dtrace
1863 # fbt
1864 # io
1865 # ip
1866 # lockstat
1867 # mach_trap
1868 # proc
1869 # profile
1870 # sched
1871 # syscall
1872 # tcp
1873 # vminfo
1874 #
1875 # 3. List of valid probes:
1876 # $ grep syscall probes.txt | less
1877 # or use dtrace directly:
1878 # $ sudo dtrace -l -P syscall | less
1879 #
1880 # trackedpid is an associative array where its value can be 0, 1 or 2.
1881 # 0 is for untracked processes and is the default value for items not
1882 # in the associative array.
1883 # 1 is for tracked processes.
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001884 # 2 is for the script created by create_subprocess_thunk() only. It is not
1885 # tracked itself but all its decendants are.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001886 #
1887 # The script will kill itself only once waiting_to_die == 1 and
1888 # current_processes == 0, so that both getlogin() was called and that
1889 # all traced processes exited.
1890 #
1891 # TODO(maruel): Use cacheable predicates. See
1892 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1893 D_CODE = """
1894 dtrace:::BEGIN {
1895 waiting_to_die = 0;
1896 current_processes = 0;
1897 logindex = 0;
1898 printf("%d %d %s_%s(\\"%s\\")\\n",
1899 logindex, PID, probeprov, probename, SCRIPT);
1900 logindex++;
1901 }
1902
1903 proc:::start /trackedpid[ppid]/ {
1904 trackedpid[pid] = 1;
1905 current_processes += 1;
1906 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1907 logindex, pid, probeprov, probename,
1908 ppid,
1909 execname,
1910 current_processes);
1911 logindex++;
1912 }
1913 /* Should use SCRIPT but there is no access to this variable at that
1914 * point. */
1915 proc:::start /ppid == PID && execname == "Python"/ {
1916 trackedpid[pid] = 2;
1917 current_processes += 1;
1918 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1919 logindex, pid, probeprov, probename,
1920 ppid,
1921 execname,
1922 current_processes);
1923 logindex++;
1924 }
1925 proc:::exit /trackedpid[pid] &&
1926 current_processes == 1 &&
1927 waiting_to_die == 1/ {
1928 trackedpid[pid] = 0;
1929 current_processes -= 1;
1930 printf("%d %d %s_%s(%d)\\n",
1931 logindex, pid, probeprov, probename,
1932 current_processes);
1933 logindex++;
1934 exit(0);
1935 }
1936 proc:::exit /trackedpid[pid]/ {
1937 trackedpid[pid] = 0;
1938 current_processes -= 1;
1939 printf("%d %d %s_%s(%d)\\n",
1940 logindex, pid, probeprov, probename,
1941 current_processes);
1942 logindex++;
1943 }
1944
1945 /* Use an arcane function to detect when we need to die */
1946 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
1947 waiting_to_die = 1;
1948 printf("%d %d %s()\\n", logindex, pid, probefunc);
1949 logindex++;
1950 }
1951 syscall::ftruncate:entry /
1952 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
1953 exit(0);
1954 }
1955
1956 syscall::open*:entry /trackedpid[pid] == 1/ {
1957 self->open_arg0 = arg0;
1958 self->open_arg1 = arg1;
1959 self->open_arg2 = arg2;
1960 }
1961 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
1962 this->open_arg0 = copyinstr(self->open_arg0);
1963 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
1964 logindex, pid, probefunc,
1965 this->open_arg0,
1966 self->open_arg1,
1967 self->open_arg2);
1968 logindex++;
1969 this->open_arg0 = 0;
1970 }
1971 syscall::open*:return /trackedpid[pid] == 1/ {
1972 self->open_arg0 = 0;
1973 self->open_arg1 = 0;
1974 self->open_arg2 = 0;
1975 }
1976
1977 syscall::rename:entry /trackedpid[pid] == 1/ {
1978 self->rename_arg0 = arg0;
1979 self->rename_arg1 = arg1;
1980 }
1981 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
1982 this->rename_arg0 = copyinstr(self->rename_arg0);
1983 this->rename_arg1 = copyinstr(self->rename_arg1);
1984 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
1985 logindex, pid, probefunc,
1986 this->rename_arg0,
1987 this->rename_arg1);
1988 logindex++;
1989 this->rename_arg0 = 0;
1990 this->rename_arg1 = 0;
1991 }
1992 syscall::rename:return /trackedpid[pid] == 1/ {
1993 self->rename_arg0 = 0;
1994 self->rename_arg1 = 0;
1995 }
1996
1997 /* Track chdir, it's painful because it is only receiving relative path.
1998 */
1999 syscall::chdir:entry /trackedpid[pid] == 1/ {
2000 self->chdir_arg0 = arg0;
2001 }
2002 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
2003 this->chdir_arg0 = copyinstr(self->chdir_arg0);
2004 printf("%d %d %s(\\"%s\\")\\n",
2005 logindex, pid, probefunc,
2006 this->chdir_arg0);
2007 logindex++;
2008 this->chdir_arg0 = 0;
2009 }
2010 syscall::chdir:return /trackedpid[pid] == 1/ {
2011 self->chdir_arg0 = 0;
2012 }
2013 """
2014
2015 # execve-specific code, tends to throw a lot of exceptions.
2016 D_CODE_EXECVE = """
2017 /* Finally what we care about! */
2018 syscall::exec*:entry /trackedpid[pid]/ {
2019 self->exec_arg0 = copyinstr(arg0);
2020 /* Incrementally probe for a NULL in the argv parameter of execve() to
2021 * figure out argc. */
2022 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
2023 * found. */
2024 self->exec_argc = 0;
2025 /* Probe for argc==1 */
2026 this->exec_argv = (user_addr_t*)copyin(
2027 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2028 self->exec_argc = this->exec_argv[self->exec_argc] ?
2029 (self->exec_argc + 1) : self->exec_argc;
2030
2031 /* Probe for argc==2 */
2032 this->exec_argv = (user_addr_t*)copyin(
2033 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2034 self->exec_argc = this->exec_argv[self->exec_argc] ?
2035 (self->exec_argc + 1) : self->exec_argc;
2036
2037 /* Probe for argc==3 */
2038 this->exec_argv = (user_addr_t*)copyin(
2039 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2040 self->exec_argc = this->exec_argv[self->exec_argc] ?
2041 (self->exec_argc + 1) : self->exec_argc;
2042
2043 /* Probe for argc==4 */
2044 this->exec_argv = (user_addr_t*)copyin(
2045 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
2046 self->exec_argc = this->exec_argv[self->exec_argc] ?
2047 (self->exec_argc + 1) : self->exec_argc;
2048
2049 /* Copy the inputs strings since there is no guarantee they'll be
2050 * present after the call completed. */
2051 self->exec_argv0 = (self->exec_argc > 0) ?
2052 copyinstr(this->exec_argv[0]) : "";
2053 self->exec_argv1 = (self->exec_argc > 1) ?
2054 copyinstr(this->exec_argv[1]) : "";
2055 self->exec_argv2 = (self->exec_argc > 2) ?
2056 copyinstr(this->exec_argv[2]) : "";
2057 self->exec_argv3 = (self->exec_argc > 3) ?
2058 copyinstr(this->exec_argv[3]) : "";
2059 this->exec_argv = 0;
2060 }
2061 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2062 /* We need to join strings here, as using multiple printf() would
2063 * cause tearing when multiple threads/processes are traced.
2064 * Since it is impossible to escape a string and join it to another one,
2065 * like sprintf("%s%S", previous, more), use hackery.
2066 * Each of the elements are split with a \\1. \\0 cannot be used because
2067 * it is simply ignored. This will conflict with any program putting a
2068 * \\1 in their execve() string but this should be "rare enough" */
2069 this->args = "";
2070 /* Process exec_argv[0] */
2071 this->args = strjoin(
2072 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2073
2074 /* Process exec_argv[1] */
2075 this->args = strjoin(
2076 this->args, (self->exec_argc > 1) ? "\\1" : "");
2077 this->args = strjoin(
2078 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2079
2080 /* Process exec_argv[2] */
2081 this->args = strjoin(
2082 this->args, (self->exec_argc > 2) ? "\\1" : "");
2083 this->args = strjoin(
2084 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2085
2086 /* Process exec_argv[3] */
2087 this->args = strjoin(
2088 this->args, (self->exec_argc > 3) ? "\\1" : "");
2089 this->args = strjoin(
2090 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2091
2092 /* Prints self->exec_argc to permits verifying the internal
2093 * consistency since this code is quite fishy. */
2094 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2095 logindex, pid, probefunc,
2096 self->exec_arg0,
2097 self->exec_argc,
2098 this->args);
2099 logindex++;
2100 this->args = 0;
2101 }
2102 syscall::exec*:return /trackedpid[pid]/ {
2103 self->exec_arg0 = 0;
2104 self->exec_argc = 0;
2105 self->exec_argv0 = 0;
2106 self->exec_argv1 = 0;
2107 self->exec_argv2 = 0;
2108 self->exec_argv3 = 0;
2109 }
2110 """
2111
2112 # Code currently not used.
2113 D_EXTRANEOUS = """
2114 /* This is a good learning experience, since it traces a lot of things
2115 * related to the process and child processes.
2116 * Warning: it generates a gigantic log. For example, tracing
2117 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2118 * several minutes to execute.
2119 */
2120 /*
2121 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2122 printf("%d %d %s_%s() = %d\\n",
2123 logindex, pid, probeprov, probefunc, errno);
2124 logindex++;
2125 }
2126 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2127 printf("%d %d %s_%s() = %d\\n",
2128 logindex, pid, probeprov, probefunc, errno);
2129 logindex++;
2130 }
2131 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2132 printf("%d %d %s_%s() = %d\\n",
2133 logindex, pid, probeprov, probefunc, errno);
2134 logindex++;
2135 }
2136 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2137 printf("%d %d %s_%s() = %d\\n",
2138 logindex, pid, probeprov, probefunc, errno);
2139 logindex++;
2140 }
2141 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2142 printf("%d %d %s_%s() = %d\\n",
2143 logindex, pid, probeprov, probefunc, errno);
2144 logindex++;
2145 }
2146 */
2147 /* TODO(maruel): *stat* functions and friends
2148 syscall::access:return,
2149 syscall::chdir:return,
2150 syscall::chflags:return,
2151 syscall::chown:return,
2152 syscall::chroot:return,
2153 syscall::getattrlist:return,
2154 syscall::getxattr:return,
2155 syscall::lchown:return,
2156 syscall::lstat64:return,
2157 syscall::lstat:return,
2158 syscall::mkdir:return,
2159 syscall::pathconf:return,
2160 syscall::readlink:return,
2161 syscall::removexattr:return,
2162 syscall::setxattr:return,
2163 syscall::stat64:return,
2164 syscall::stat:return,
2165 syscall::truncate:return,
2166 syscall::unlink:return,
2167 syscall::utimes:return,
2168 */
2169 """
2170
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002171 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002172 """Starts the log collection with dtrace.
2173
2174 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2175 this needs to wait for dtrace to be "warmed up".
2176 """
2177 super(Dtrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002178 # This script is used as a signal to figure out the root process.
2179 self._signal_script = create_subprocess_thunk()
2180 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002181 # This unique dummy temp file is used to signal the dtrace script that it
2182 # should stop as soon as all the child processes are done. A bit hackish
2183 # but works fine enough.
2184 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
Marc-Antoine Ruel3c979cb2015-03-11 13:43:28 -04002185 prefix=u'trace_signal_file')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002186
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002187 dtrace_path = '/usr/sbin/dtrace'
maruel12e30012015-10-09 11:55:35 -07002188 if not fs.isfile(dtrace_path):
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002189 dtrace_path = 'dtrace'
maruel12e30012015-10-09 11:55:35 -07002190 elif use_sudo is None and (fs.stat(dtrace_path).st_mode & stat.S_ISUID):
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002191 # No need to sudo. For those following at home, don't do that.
2192 use_sudo = False
2193
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002194 # Note: do not use the -p flag. It's useless if the initial process quits
2195 # too fast, resulting in missing traces from the grand-children. The D
2196 # code manages the dtrace lifetime itself.
2197 trace_cmd = [
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002198 dtrace_path,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002199 # Use a larger buffer if getting 'out of scratch space' errors.
2200 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2201 '-b', '10m',
2202 '-x', 'dynvarsize=10m',
2203 #'-x', 'dtrace_global_maxsize=1m',
2204 '-x', 'evaltime=exec',
2205 '-o', '/dev/stderr',
2206 '-q',
2207 '-n', self._get_dtrace_code(),
2208 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002209 if use_sudo is not False:
2210 trace_cmd.insert(0, 'sudo')
2211
maruel12e30012015-10-09 11:55:35 -07002212 with fs.open(self._logname + '.log', 'wb') as logfile:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002213 self._dtrace = subprocess.Popen(
2214 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2215 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2216
2217 # Reads until one line is printed, which signifies dtrace is up and ready.
maruel12e30012015-10-09 11:55:35 -07002218 with fs.open(self._logname + '.log', 'rb') as logfile:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002219 while 'dtrace_BEGIN' not in logfile.readline():
2220 if self._dtrace.poll() is not None:
2221 # Do a busy wait. :/
2222 break
2223 logging.debug('dtrace started')
2224
2225 def _get_dtrace_code(self):
2226 """Setups the D code to implement child process tracking.
2227
2228 Injects the cookie in the script so it knows when to stop.
2229
2230 The script will detect any instance of the script created with
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002231 create_subprocess_thunk() and will start tracing it.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002232 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002233 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002234 'inline int PID = %d;\n'
2235 'inline string SCRIPT = "%s";\n'
2236 'inline int FILE_ID = %d;\n'
2237 '\n'
2238 '%s') % (
2239 os.getpid(),
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002240 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002241 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002242 self.D_CODE)
2243 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2244 # Do not enable by default since it tends to spew dtrace: error lines
2245 # because the execve() parameters are not in valid memory at the time of
2246 # logging.
2247 # TODO(maruel): Find a way to make this reliable since it's useful but
2248 # only works in limited/trivial uses cases for now.
2249 out += self.D_CODE_EXECVE
2250 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002251
2252 def trace(self, cmd, cwd, tracename, output):
2253 """Runs dtrace on an executable.
2254
2255 This dtruss is broken when it starts the process itself or when tracing
2256 child processes, this code starts a wrapper process
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002257 generated with create_subprocess_thunk() which starts the executable to
2258 trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002259 """
2260 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2261 assert os.path.isabs(cmd[0]), cmd[0]
2262 assert os.path.isabs(cwd), cwd
2263 assert os.path.normpath(cwd) == cwd, cwd
2264 with self._lock:
2265 if not self._initialized:
2266 raise TracingFailure(
2267 'Called Tracer.trace() on an unitialized object',
2268 None, None, None, tracename)
2269 assert tracename not in (i['trace'] for i in self._traces)
2270
2271 # Starts the script wrapper to start the child process. This signals the
2272 # dtrace script that this process is to be traced.
2273 stdout = stderr = None
2274 if output:
2275 stdout = subprocess.PIPE
2276 stderr = subprocess.STDOUT
2277 child_cmd = [
2278 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002279 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002280 tracename,
2281 ]
2282 # Call a dummy function so that dtrace knows I'm about to launch a process
2283 # that needs to be traced.
2284 # Yummy.
2285 child = subprocess.Popen(
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00002286 child_cmd + tools.fix_python_path(cmd),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002287 stdin=subprocess.PIPE,
2288 stdout=stdout,
2289 stderr=stderr,
2290 cwd=cwd)
2291 logging.debug('Started child pid: %d' % child.pid)
2292
2293 out = child.communicate()[0]
2294 # This doesn't mean tracing is done, one of the grand-child process may
2295 # still be alive. It will be tracked with the dtrace script.
2296
2297 with self._lock:
2298 assert tracename not in (i['trace'] for i in self._traces)
2299 self._traces.append(
2300 {
2301 'cmd': cmd,
2302 'cwd': cwd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002303 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002304 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002305 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002306 })
2307 return child.returncode, out
2308
2309 def close(self, timeout=None):
2310 """Terminates dtrace."""
2311 logging.debug('close(%s)' % timeout)
2312 try:
2313 try:
2314 super(Dtrace.Tracer, self).close(timeout)
2315 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002316 # ftruncate doesn't exist on Windows.
2317 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002318 if timeout:
2319 start = time.time()
2320 # Use polling. :/
2321 while (self._dtrace.poll() is None and
2322 (time.time() - start) < timeout):
2323 time.sleep(0.1)
2324 self._dtrace.kill()
2325 self._dtrace.wait()
2326 finally:
2327 # Make sure to kill it in any case.
2328 if self._dtrace.poll() is None:
2329 try:
2330 self._dtrace.kill()
2331 self._dtrace.wait()
2332 except OSError:
2333 pass
2334
2335 if self._dtrace.returncode != 0:
2336 # Warn about any dtrace failure but basically ignore it.
2337 print 'dtrace failure: %s' % self._dtrace.returncode
2338 finally:
2339 os.close(self._dummy_file_id)
maruel12e30012015-10-09 11:55:35 -07002340 fs.remove(self._dummy_file_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002341
2342 def post_process_log(self):
2343 """Sorts the log back in order when each call occured.
2344
2345 dtrace doesn't save the buffer in strict order since it keeps one buffer
2346 per CPU.
2347 """
2348 super(Dtrace.Tracer, self).post_process_log()
2349 logname = self._logname + '.log'
maruel12e30012015-10-09 11:55:35 -07002350 with fs.open(logname, 'rb') as logfile:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002351 lines = [l for l in logfile if l.strip()]
2352 errors = [l for l in lines if l.startswith('dtrace:')]
2353 if errors:
2354 raise TracingFailure(
2355 'Found errors in the trace: %s' % '\n'.join(errors),
2356 None, None, None, logname)
2357 try:
2358 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2359 except ValueError:
2360 raise TracingFailure(
2361 'Found errors in the trace: %s' % '\n'.join(
2362 l for l in lines if l.split(' ', 1)[0].isdigit()),
2363 None, None, None, logname)
maruel12e30012015-10-09 11:55:35 -07002364 with fs.open(logname, 'wb') as logfile:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002365 logfile.write(''.join(lines))
2366
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002367 def __init__(self, use_sudo=None):
2368 super(Dtrace, self).__init__()
2369 self.use_sudo = use_sudo
2370
2371 def get_tracer(self, logname):
2372 return self.Tracer(logname, self.use_sudo)
2373
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002374 @staticmethod
2375 def clean_trace(logname):
2376 for ext in ('', '.log'):
maruel12e30012015-10-09 11:55:35 -07002377 if fs.isfile(logname + ext):
2378 fs.remove(logname + ext)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002379
2380 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002381 def parse_log(cls, logname, blacklist, trace_name):
2382 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002383 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002384
2385 def blacklist_more(filepath):
2386 # All the HFS metadata is in the form /.vol/...
2387 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2388
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05002389 data = tools.read_json(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002390 out = []
2391 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002392 if trace_name and item['trace'] != trace_name:
2393 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002394 result = {
2395 'output': item['output'],
2396 'trace': item['trace'],
2397 }
2398 try:
2399 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002400 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2401 # be valid UTF-8 and we control the log output.
2402 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002403 context.on_line(line)
2404 result['results'] = context.to_results()
2405 except TracingFailure:
2406 result['exception'] = sys.exc_info()
2407 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002408 return out
2409
2410
2411class LogmanTrace(ApiBase):
2412 """Uses the native Windows ETW based tracing functionality to trace a child
2413 process.
2414
2415 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2416 the Windows Kernel doesn't have a concept of 'current working directory' at
2417 all. A Win32 process has a map of current directories, one per drive letter
2418 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2419 opened relative to another file_object or as an absolute path. All the current
2420 working directory logic is done in user mode.
2421 """
2422 class Context(ApiBase.Context):
2423 """Processes a ETW log line and keeps the list of existent and non
2424 existent files accessed.
2425
2426 Ignores directories.
2427 """
2428 # These indexes are for the stripped version in json.
2429 EVENT_NAME = 0
2430 TYPE = 1
2431 PID = 2
2432 TID = 3
2433 PROCESSOR_ID = 4
2434 TIMESTAMP = 5
2435 USER_DATA = 6
2436
2437 class Process(ApiBase.Context.Process):
2438 def __init__(self, *args):
2439 super(LogmanTrace.Context.Process, self).__init__(*args)
2440 # Handle file objects that succeeded.
2441 self.file_objects = {}
2442
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002443 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2444 logging.info(
2445 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2446 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002447 super(LogmanTrace.Context, self).__init__(blacklist)
maruel@chromium.org561d4b22013-09-26 21:08:08 +00002448 self._drive_map = file_path.DosDriveMap()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002449 # Threads mapping to the corresponding process id.
2450 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002451 # Process ID of the tracer, e.g. the temporary script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002452 # create_subprocess_thunk(). This is tricky because the process id may
2453 # have been reused.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002454 self._thunk_pid = thunk_pid
2455 self._thunk_cmd = thunk_cmd
2456 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002457 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002458 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002459
2460 def on_line(self, line):
2461 """Processes a json Event line."""
2462 self._line_number += 1
2463 try:
2464 # By Opcode
2465 handler = getattr(
2466 self,
2467 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2468 None)
2469 if not handler:
2470 raise TracingFailure(
2471 'Unexpected event %s_%s' % (
2472 line[self.EVENT_NAME], line[self.TYPE]),
2473 None, None, None)
2474 handler(line)
2475 except TracingFailure, e:
2476 # Hack in the values since the handler could be a static function.
2477 e.pid = line[self.PID]
2478 e.line = line
2479 e.line_number = self._line_number
2480 # Re-raise the modified exception.
2481 raise
2482 except (KeyError, NotImplementedError, ValueError), e:
2483 raise TracingFailure(
2484 'Trace generated a %s exception: %s' % (
2485 e.__class__.__name__, str(e)),
2486 line[self.PID],
2487 self._line_number,
2488 line,
2489 e)
2490
2491 def to_results(self):
2492 if not self.root_process:
2493 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002494 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002495 None, None, None)
2496 process = self.root_process.to_results_process()
2497 return Results(process)
2498
2499 def _thread_to_process(self, tid):
2500 """Finds the process from the thread id."""
2501 tid = int(tid, 16)
2502 pid = self._threads_active.get(tid)
2503 if not pid or not self._process_lookup.get(pid):
2504 return
2505 return self._process_lookup[pid]
2506
2507 @classmethod
2508 def handle_EventTrace_Header(cls, line):
2509 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2510 BUFFER_SIZE = cls.USER_DATA
2511 #VERSION = cls.USER_DATA + 1
2512 #PROVIDER_VERSION = cls.USER_DATA + 2
2513 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2514 #END_TIME = cls.USER_DATA + 4
2515 #TIMER_RESOLUTION = cls.USER_DATA + 5
2516 #MAX_FILE_SIZE = cls.USER_DATA + 6
2517 #LOG_FILE_MODE = cls.USER_DATA + 7
2518 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2519 #START_BUFFERS = cls.USER_DATA + 9
2520 #POINTER_SIZE = cls.USER_DATA + 10
2521 EVENTS_LOST = cls.USER_DATA + 11
2522 #CPU_SPEED = cls.USER_DATA + 12
2523 #LOGGER_NAME = cls.USER_DATA + 13
2524 #LOG_FILE_NAME = cls.USER_DATA + 14
2525 #BOOT_TIME = cls.USER_DATA + 15
2526 #PERF_FREQ = cls.USER_DATA + 16
2527 #START_TIME = cls.USER_DATA + 17
2528 #RESERVED_FLAGS = cls.USER_DATA + 18
2529 #BUFFERS_LOST = cls.USER_DATA + 19
2530 #SESSION_NAME_STRING = cls.USER_DATA + 20
2531 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2532 if line[EVENTS_LOST] != '0':
2533 raise TracingFailure(
2534 ( '%s events were lost during trace, please increase the buffer '
2535 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2536 None, None, None)
2537
2538 def handle_FileIo_Cleanup(self, line):
2539 """General wisdom: if a file is closed, it's because it was opened.
2540
2541 Note that FileIo_Close is not used since if a file was opened properly but
2542 not closed before the process exits, only Cleanup will be logged.
2543 """
2544 #IRP = self.USER_DATA
2545 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2546 FILE_OBJECT = self.USER_DATA + 2
2547 #FILE_KEY = self.USER_DATA + 3
2548 proc = self._thread_to_process(line[TTID])
2549 if not proc:
2550 # Not a process we care about.
2551 return
2552 file_object = line[FILE_OBJECT]
2553 if file_object in proc.file_objects:
maruel@chromium.org538141b2013-06-03 20:57:17 +00002554 filepath, access_type = proc.file_objects.pop(file_object)
2555 proc.add_file(filepath, access_type)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002556
2557 def handle_FileIo_Create(self, line):
2558 """Handles a file open.
2559
2560 All FileIo events are described at
2561 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2562 for some value of 'description'.
2563
2564 " (..) process and thread id values of the IO events (..) are not valid "
2565 http://msdn.microsoft.com/magazine/ee358703.aspx
2566
2567 The FileIo.Create event doesn't return if the CreateFile() call
2568 succeeded, so keep track of the file_object and check that it is
2569 eventually closed with FileIo_Cleanup.
2570 """
2571 #IRP = self.USER_DATA
2572 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2573 FILE_OBJECT = self.USER_DATA + 2
2574 #CREATE_OPTIONS = self.USER_DATA + 3
2575 #FILE_ATTRIBUTES = self.USER_DATA + 4
maruel@chromium.org538141b2013-06-03 20:57:17 +00002576 #SHARE_ACCESS = self.USER_DATA + 5
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002577 OPEN_PATH = self.USER_DATA + 6
2578
2579 proc = self._thread_to_process(line[TTID])
2580 if not proc:
2581 # Not a process we care about.
2582 return
2583
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002584 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002585 # Ignore directories and bare drive right away.
2586 if raw_path.endswith(os.path.sep):
2587 return
2588 filepath = self._drive_map.to_win32(raw_path)
2589 # Ignore bare drive right away. Some may still fall through with format
2590 # like '\\?\X:'
2591 if len(filepath) == 2:
2592 return
2593 file_object = line[FILE_OBJECT]
maruel12e30012015-10-09 11:55:35 -07002594 if fs.isdir(filepath):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002595 # There is no O_DIRECTORY equivalent on Windows. The closed is
2596 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2597 # simply discard directories are they are found.
2598 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002599 # Override any stale file object.
2600 # TODO(maruel): Figure out a way to detect if the file was opened for
2601 # reading or writting. Sadly CREATE_OPTIONS doesn't seem to be of any help
2602 # here. For now mark as None to make it clear we have no idea what it is
2603 # about.
2604 proc.file_objects[file_object] = (filepath, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002605
2606 def handle_FileIo_Rename(self, line):
2607 # TODO(maruel): Handle?
2608 pass
2609
2610 def handle_Process_End(self, line):
2611 pid = line[self.PID]
2612 if self._process_lookup.get(pid):
2613 logging.info('Terminated: %d' % pid)
2614 self._process_lookup[pid] = None
2615 else:
2616 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002617 if self._thunk_process and self._thunk_process.pid == pid:
2618 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002619
2620 def handle_Process_Start(self, line):
2621 """Handles a new child process started by PID."""
2622 #UNIQUE_PROCESS_KEY = self.USER_DATA
2623 PROCESS_ID = self.USER_DATA + 1
2624 #PARENT_PID = self.USER_DATA + 2
2625 #SESSION_ID = self.USER_DATA + 3
2626 #EXIT_STATUS = self.USER_DATA + 4
2627 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2628 #USER_SID = self.USER_DATA + 6
2629 IMAGE_FILE_NAME = self.USER_DATA + 7
2630 COMMAND_LINE = self.USER_DATA + 8
2631
2632 ppid = line[self.PID]
2633 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002634 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002635 logging.debug(
2636 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002637 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002638
2639 if pid == self._thunk_pid:
2640 # Need to ignore processes we don't know about because the log is
2641 # system-wide. self._thunk_pid shall start only one process.
2642 # This is tricky though because Windows *loves* to reuse process id and
2643 # it happens often that the process ID of the thunk script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002644 # create_subprocess_thunk() is reused. So just detecting the pid here is
2645 # not sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002646 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2647 logging.info(
2648 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2649 pid, self._trace_name, command_line, self._thunk_cmd)
2650 return
2651
2652 # TODO(maruel): The check is quite weak. Add the thunk path.
2653 if self._thunk_process:
2654 raise TracingFailure(
2655 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2656 'already set') % (self._thunk_pid, self._thunk_process.pid),
2657 None, None, None)
2658 proc = self.Process(self.blacklist, pid, None)
2659 self._thunk_process = proc
2660 return
2661 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002662 proc = self.Process(self.blacklist, pid, None)
2663 self.root_process = proc
2664 ppid = None
2665 elif self._process_lookup.get(ppid):
2666 proc = self.Process(self.blacklist, pid, None)
2667 self._process_lookup[ppid].children.append(proc)
2668 else:
2669 # Ignore
2670 return
2671 self._process_lookup[pid] = proc
2672
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002673 proc.command = command_line
2674 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002675 # proc.command[0] may be the absolute path of 'executable' but it may be
2676 # anything else too. If it happens that command[0] ends with executable,
2677 # use it, otherwise defaults to the base name.
2678 cmd0 = proc.command[0].lower()
2679 if not cmd0.endswith('.exe'):
2680 # TODO(maruel): That's not strictly true either.
2681 cmd0 += '.exe'
maruel12e30012015-10-09 11:55:35 -07002682 if cmd0.endswith(proc.executable) and fs.isfile(cmd0):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002683 # Fix the path.
2684 cmd0 = cmd0.replace('/', os.path.sep)
2685 cmd0 = os.path.normpath(cmd0)
maruel@chromium.org561d4b22013-09-26 21:08:08 +00002686 proc.executable = file_path.get_native_path_case(cmd0)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002687 logging.info(
2688 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2689
2690 def handle_Thread_End(self, line):
2691 """Has the same parameters as Thread_Start."""
2692 tid = int(line[self.TID], 16)
2693 self._threads_active.pop(tid, None)
2694
2695 def handle_Thread_Start(self, line):
2696 """Handles a new thread created.
2697
2698 Do not use self.PID here since a process' initial thread is created by
2699 the parent process.
2700 """
2701 PROCESS_ID = self.USER_DATA
2702 TTHREAD_ID = self.USER_DATA + 1
2703 #STACK_BASE = self.USER_DATA + 2
2704 #STACK_LIMIT = self.USER_DATA + 3
2705 #USER_STACK_BASE = self.USER_DATA + 4
2706 #USER_STACK_LIMIT = self.USER_DATA + 5
2707 #AFFINITY = self.USER_DATA + 6
2708 #WIN32_START_ADDR = self.USER_DATA + 7
2709 #TEB_BASE = self.USER_DATA + 8
2710 #SUB_PROCESS_TAG = self.USER_DATA + 9
2711 #BASE_PRIORITY = self.USER_DATA + 10
2712 #PAGE_PRIORITY = self.USER_DATA + 11
2713 #IO_PRIORITY = self.USER_DATA + 12
2714 #THREAD_FLAGS = self.USER_DATA + 13
2715 # Do not use self.PID here since a process' initial thread is created by
2716 # the parent process.
2717 pid = int(line[PROCESS_ID], 16)
2718 tid = int(line[TTHREAD_ID], 16)
2719 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2720 self._threads_active[tid] = pid
2721
2722 @classmethod
2723 def supported_events(cls):
2724 """Returns all the procesed events."""
2725 out = []
2726 for member in dir(cls):
2727 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2728 if match:
2729 out.append(match.groups())
2730 return out
2731
2732 class Tracer(ApiBase.Tracer):
2733 # The basic headers.
2734 EXPECTED_HEADER = [
2735 u'Event Name',
2736 u'Type',
2737 u'Event ID',
2738 u'Version',
2739 u'Channel',
2740 u'Level', # 5
2741 u'Opcode',
2742 u'Task',
2743 u'Keyword',
2744 u'PID',
2745 u'TID', # 10
2746 u'Processor Number',
2747 u'Instance ID',
2748 u'Parent Instance ID',
2749 u'Activity ID',
2750 u'Related Activity ID', # 15
2751 u'Clock-Time',
2752 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2753 u'User(ms)', # pretty much useless.
2754 u'User Data', # Extra arguments that are event-specific.
2755 ]
2756 # Only the useful headers common to all entries are listed there. Any column
2757 # at 19 or higher is dependent on the specific event.
2758 EVENT_NAME = 0
2759 TYPE = 1
2760 PID = 9
2761 TID = 10
2762 PROCESSOR_ID = 11
2763 TIMESTAMP = 16
2764 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2765 USER_DATA = 19
2766
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002767 class CsvReader(object):
2768 """CSV reader that reads files generated by tracerpt.exe.
2769
2770 csv.reader() fails to read them properly, it mangles file names quoted
2771 with "" with a comma in it.
2772 """
2773 # 0. Had a ',' or one of the following ' ' after a comma, next should
2774 # be ' ', '"' or string or ',' for an empty field.
2775 ( HAD_DELIMITER,
2776 # 1. Processing an unquoted field up to ','.
2777 IN_STR,
2778 # 2. Processing a new field starting with '"'.
2779 STARTING_STR_QUOTED,
2780 # 3. Second quote in a row at the start of a field. It could be either
2781 # '""foo""' or '""'. Who the hell thought it was a great idea to use
2782 # the same character for delimiting and escaping?
2783 STARTING_SECOND_QUOTE,
2784 # 4. A quote inside a quoted string where the previous character was
2785 # not a quote, so the string is not empty. Can be either: end of a
2786 # quoted string (a delimiter) or a quote escape. The next char must be
2787 # either '"' or ','.
2788 HAD_QUOTE_IN_QUOTED,
2789 # 5. Second quote inside a quoted string.
2790 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
2791 # 6. Processing a field that started with '"'.
2792 IN_STR_QUOTED) = range(7)
2793
2794 def __init__(self, f):
2795 self.f = f
2796
2797 def __iter__(self):
2798 return self
2799
2800 def next(self):
2801 """Splits the line in fields."""
2802 line = self.f.readline()
2803 if not line:
2804 raise StopIteration()
2805 line = line.strip()
2806 fields = []
2807 state = self.HAD_DELIMITER
2808 for i, c in enumerate(line):
2809 if state == self.HAD_DELIMITER:
2810 if c == ',':
2811 # Empty field.
2812 fields.append('')
2813 elif c == ' ':
2814 # Ignore initial whitespaces
2815 pass
2816 elif c == '"':
2817 state = self.STARTING_STR_QUOTED
2818 fields.append('')
2819 else:
2820 # Start of a new field.
2821 state = self.IN_STR
2822 fields.append(c)
2823
2824 elif state == self.IN_STR:
2825 # Do not accept quote inside unquoted field.
2826 assert c != '"', (i, c, line, fields)
2827 if c == ',':
2828 fields[-1] = fields[-1].strip()
2829 state = self.HAD_DELIMITER
2830 else:
2831 fields[-1] = fields[-1] + c
2832
2833 elif state == self.STARTING_STR_QUOTED:
2834 if c == '"':
2835 # Do not store the character yet.
2836 state = self.STARTING_SECOND_QUOTE
2837 else:
2838 state = self.IN_STR_QUOTED
2839 fields[-1] = fields[-1] + c
2840
2841 elif state == self.STARTING_SECOND_QUOTE:
2842 if c == ',':
2843 # It was an empty field. '""' == ''.
2844 state = self.HAD_DELIMITER
2845 else:
2846 fields[-1] = fields[-1] + '"' + c
2847 state = self.IN_STR_QUOTED
2848
2849 elif state == self.HAD_QUOTE_IN_QUOTED:
2850 if c == ',':
2851 # End of the string.
2852 state = self.HAD_DELIMITER
2853 elif c == '"':
2854 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
2855 else:
2856 # The previous double-quote was just an unescaped quote.
2857 fields[-1] = fields[-1] + '"' + c
2858 state = self.IN_STR_QUOTED
2859
2860 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2861 if c == ',':
2862 # End of the string.
2863 state = self.HAD_DELIMITER
2864 fields[-1] = fields[-1] + '"'
2865 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00002866 # That's just how the logger rolls. Revert back to appending the
2867 # char and "guess" it was a quote in a double-quoted string.
2868 state = self.IN_STR_QUOTED
2869 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002870
2871 elif state == self.IN_STR_QUOTED:
2872 if c == '"':
2873 # Could be a delimiter or an escape.
2874 state = self.HAD_QUOTE_IN_QUOTED
2875 else:
2876 fields[-1] = fields[-1] + c
2877
2878 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2879 fields[-1] = fields[-1] + '"'
2880 else:
2881 assert state in (
2882 # Terminated with a normal field.
2883 self.IN_STR,
2884 # Terminated with an empty field.
2885 self.STARTING_SECOND_QUOTE,
2886 # Terminated with a normal quoted field.
2887 self.HAD_QUOTE_IN_QUOTED), (
2888 line, state, fields)
2889 return fields
2890
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002891 def __init__(self, logname):
2892 """Starts the log collection.
2893
2894 Requires administrative access. logman.exe is synchronous so no need for a
2895 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2896 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2897
2898 One can get the list of potentially interesting providers with:
2899 "logman query providers | findstr /i file"
2900 """
2901 super(LogmanTrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002902 self._signal_script = create_subprocess_thunk()
2903 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002904 cmd_start = [
2905 'logman.exe',
2906 'start',
2907 'NT Kernel Logger',
2908 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2909 # splitio,fileiocompletion,syscall,file,cswitch,img
2910 '(process,fileio,thread)',
2911 '-o', self._logname + '.etl',
2912 '-ets', # Send directly to kernel
2913 # Values extracted out of thin air.
2914 # Event Trace Session buffer size in kb.
2915 '-bs', '10240',
2916 # Number of Event Trace Session buffers.
2917 '-nb', '16', '256',
2918 ]
2919 logging.debug('Running: %s' % cmd_start)
2920 try:
2921 subprocess.check_call(
2922 cmd_start,
2923 stdin=subprocess.PIPE,
2924 stdout=subprocess.PIPE,
2925 stderr=subprocess.STDOUT)
2926 except subprocess.CalledProcessError, e:
2927 if e.returncode == -2147024891:
2928 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2929 elif e.returncode == -2144337737:
2930 print >> sys.stderr, (
2931 'A kernel trace was already running, stop it and try again')
2932 raise
2933
2934 def trace(self, cmd, cwd, tracename, output):
2935 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2936 assert os.path.isabs(cmd[0]), cmd[0]
2937 assert os.path.isabs(cwd), cwd
2938 assert os.path.normpath(cwd) == cwd, cwd
2939 with self._lock:
2940 if not self._initialized:
2941 raise TracingFailure(
2942 'Called Tracer.trace() on an unitialized object',
2943 None, None, None, tracename)
2944 assert tracename not in (i['trace'] for i in self._traces)
2945
2946 # Use "logman -?" for help.
2947
2948 stdout = stderr = None
2949 if output:
2950 stdout = subprocess.PIPE
2951 stderr = subprocess.STDOUT
2952
2953 # Run the child process.
2954 logging.debug('Running: %s' % cmd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002955 # Use the temporary script generated with create_subprocess_thunk() so we
2956 # have a clear pid owner. Since trace_inputs.py can be used as a library
2957 # and could trace multiple processes simultaneously, it makes it more
2958 # complex if the executable to be traced is executed directly here. It
2959 # also solves issues related to logman.exe that needs to be executed to
2960 # control the kernel trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002961 child_cmd = [
2962 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002963 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002964 tracename,
2965 ]
2966 child = subprocess.Popen(
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00002967 child_cmd + tools.fix_python_path(cmd),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002968 cwd=cwd,
2969 stdin=subprocess.PIPE,
2970 stdout=stdout,
2971 stderr=stderr)
2972 logging.debug('Started child pid: %d' % child.pid)
2973 out = child.communicate()[0]
2974 # This doesn't mean all the grand-children are done. Sadly, we don't have
2975 # a good way to determine that.
2976
2977 with self._lock:
2978 assert tracename not in (i['trace'] for i in self._traces)
2979 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002980 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002981 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002982 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002983 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002984 # Used to figure out the real process when process ids are reused.
2985 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002986 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002987 })
2988
2989 return child.returncode, out
2990
2991 def close(self, _timeout=None):
2992 """Stops the kernel log collection and converts the traces to text
2993 representation.
2994 """
2995 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00002996 try:
2997 super(LogmanTrace.Tracer, self).close()
2998 finally:
2999 cmd_stop = [
3000 'logman.exe',
3001 'stop',
3002 'NT Kernel Logger',
3003 '-ets', # Sends the command directly to the kernel.
3004 ]
3005 logging.debug('Running: %s' % cmd_stop)
3006 subprocess.check_call(
3007 cmd_stop,
3008 stdin=subprocess.PIPE,
3009 stdout=subprocess.PIPE,
3010 stderr=subprocess.STDOUT)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003011
3012 def post_process_log(self):
3013 """Converts the .etl file into .csv then into .json."""
3014 super(LogmanTrace.Tracer, self).post_process_log()
3015 logformat = 'csv'
3016 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003017 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003018
maruel@chromium.org808e3f22013-05-16 16:42:54 +00003019 def _gen_logdata(self):
3020 return {
3021 'format': 'csv',
3022 'traces': self._traces,
3023 }
3024
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003025 def _trim_log(self, logformat):
3026 """Reduces the amount of data in original log by generating a 'reduced'
3027 log.
3028 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003029 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003030 file_handle = codecs.open(
3031 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003032
3033 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003034 assert sys.getfilesystemencoding() == 'mbcs'
3035 file_handle = codecs.open(
3036 self._logname + '.' + logformat, 'r',
3037 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003038
3039 supported_events = LogmanTrace.Context.supported_events()
3040
3041 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003042 """Loads items from the generator and returns the interesting data.
3043
3044 It filters out any uninteresting line and reduce the amount of data in
3045 the trace.
3046 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003047 for index, line in enumerate(generator):
3048 if not index:
3049 if line != self.EXPECTED_HEADER:
3050 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003051 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003052 None, None, None)
3053 continue
3054 # As you can see, the CSV is full of useful non-redundant information:
3055 if (line[2] != '0' or # Event ID
3056 line[3] not in ('2', '3') or # Version
3057 line[4] != '0' or # Channel
3058 line[5] != '0' or # Level
3059 line[7] != '0' or # Task
3060 line[8] != '0x0000000000000000' or # Keyword
3061 line[12] != '' or # Instance ID
3062 line[13] != '' or # Parent Instance ID
3063 line[14] != self.NULL_GUID or # Activity ID
3064 line[15] != ''): # Related Activity ID
3065 raise TracingFailure(
3066 'Found unexpected values in line: %s' % ' '.join(line),
3067 None, None, None)
3068
3069 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3070 continue
3071
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003072 yield [
3073 line[self.EVENT_NAME],
3074 line[self.TYPE],
3075 line[self.PID],
3076 line[self.TID],
3077 line[self.PROCESSOR_ID],
3078 line[self.TIMESTAMP],
3079 ] + line[self.USER_DATA:]
3080
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003081 # must not convert the trim() call into a list, since it will use too much
3082 # memory for large trace. use a csv file as a workaround since the json
3083 # parser requires a complete in-memory file.
maruel12e30012015-10-09 11:55:35 -07003084 path = os.path.join(
3085 unicode(os.getcwd()), u'%s.preprocessed' % self._logname)
3086 with fs.open(path, 'wb') as f:
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003087 # $ and * can't be used in file name on windows, reducing the likelihood
3088 # of having to escape a string.
3089 out = csv.writer(
3090 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003091 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003092 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003093
3094 def _convert_log(self, logformat):
3095 """Converts the ETL trace to text representation.
3096
3097 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3098 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3099
3100 Arguments:
3101 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3102
3103 Use "tracerpt -?" for help.
3104 """
3105 LOCALE_INVARIANT = 0x7F
3106 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3107 cmd_convert = [
3108 'tracerpt.exe',
3109 '-l', self._logname + '.etl',
3110 '-o', self._logname + '.' + logformat,
3111 '-gmt', # Use UTC
3112 '-y', # No prompt
3113 # Use -of XML to get the header of each items after column 19, e.g. all
3114 # the actual headers of 'User Data'.
3115 ]
3116
3117 if logformat == 'csv':
3118 # tracerpt localizes the 'Type' column, for major brainfuck
3119 # entertainment. I can't imagine any sane reason to do that.
3120 cmd_convert.extend(['-of', 'CSV'])
3121 elif logformat == 'csv_utf16':
3122 # This causes it to use UTF-16, which doubles the log size but ensures
3123 # the log is readable for non-ASCII characters.
3124 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3125 elif logformat == 'xml':
3126 cmd_convert.extend(['-of', 'XML'])
3127 else:
3128 raise ValueError('Unexpected log format \'%s\'' % logformat)
3129 logging.debug('Running: %s' % cmd_convert)
3130 # This can takes tens of minutes for large logs.
3131 # Redirects all output to stderr.
3132 subprocess.check_call(
3133 cmd_convert,
3134 stdin=subprocess.PIPE,
3135 stdout=sys.stderr,
3136 stderr=sys.stderr)
3137
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003138 def __init__(self, use_sudo=False): # pylint: disable=W0613
3139 super(LogmanTrace, self).__init__()
3140 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3141
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003142 @staticmethod
3143 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003144 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel12e30012015-10-09 11:55:35 -07003145 if fs.isfile(logname + ext):
3146 fs.remove(logname + ext)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003147
3148 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003149 def parse_log(cls, logname, blacklist, trace_name):
3150 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003151 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003152
3153 def blacklist_more(filepath):
3154 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3155 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3156
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003157 # Create a list of (Context, result_dict) tuples. This is necessary because
3158 # the csv file may be larger than the amount of available memory.
3159 contexes = [
3160 (
3161 cls.Context(
3162 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3163 {
3164 'output': item['output'],
3165 'trace': item['trace'],
3166 },
3167 )
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05003168 for item in tools.read_json(logname)['traces']
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003169 if not trace_name or item['trace'] == trace_name
3170 ]
3171
3172 # The log may be too large to fit in memory and it is not efficient to read
3173 # it multiple times, so multiplex the contexes instead, which is slightly
3174 # more awkward.
maruel12e30012015-10-09 11:55:35 -07003175 path = os.path.join(unicode(os.getcwd()), u'%s.preprocessed' % logname)
3176 with fs.open(path, 'rb') as f:
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003177 lines = csv.reader(
3178 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3179 for encoded in lines:
3180 line = [s.decode('utf-8') for s in encoded]
3181 # Convert the PID in-place from hex.
3182 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3183 for context in contexes:
3184 if 'exception' in context[1]:
3185 continue
3186 try:
3187 context[0].on_line(line)
3188 except TracingFailure:
3189 context[1]['exception'] = sys.exc_info()
3190
3191 for context in contexes:
3192 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003193 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003194 context[1]['results'] = context[0].to_results()
3195
3196 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003197
3198
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003199def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003200 """Returns the correct implementation for the current OS."""
3201 if sys.platform == 'cygwin':
3202 raise NotImplementedError(
3203 'Not implemented for cygwin, start the script from Win32 python')
3204 flavors = {
3205 'win32': LogmanTrace,
3206 'darwin': Dtrace,
3207 'sunos5': Dtrace,
3208 'freebsd7': Dtrace,
3209 'freebsd8': Dtrace,
3210 }
3211 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003212 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003213
3214
3215def extract_directories(root_dir, files, blacklist):
3216 """Detects if all the files in a directory are in |files| and if so, replace
3217 the individual files by a Results.Directory instance.
3218
3219 Takes a list of Results.File instances and returns a shorter list of
3220 Results.File and Results.Directory instances.
3221
3222 Arguments:
3223 - root_dir: Optional base directory that shouldn't be search further.
3224 - files: list of Results.File instances.
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003225 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003226 """
3227 logging.info(
3228 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3229 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003230 # It is important for root_dir to not be a symlinked path, make sure to call
3231 # os.path.realpath() as needed.
3232 assert not root_dir or (
maruel5ab2e6c2016-08-03 12:00:37 -07003233 os.path.realpath(file_path.get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003234 assert not any(isinstance(f, Results.Directory) for f in files)
3235 # Remove non existent files.
3236 files = [f for f in files if f.existent]
3237 if not files:
3238 return files
3239 # All files must share the same root, which can be None.
3240 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3241
3242 # Creates a {directory: {filename: File}} mapping, up to root.
3243 buckets = {}
3244 if root_dir:
3245 buckets[root_dir] = {}
3246 for fileobj in files:
3247 path = fileobj.full_path
3248 directory = os.path.dirname(path)
3249 assert directory
3250 # Do not use os.path.basename() so trailing os.path.sep is kept.
3251 basename = path[len(directory)+1:]
3252 files_in_directory = buckets.setdefault(directory, {})
3253 files_in_directory[basename] = fileobj
3254 # Add all the directories recursively up to root.
3255 while True:
3256 old_d = directory
3257 directory = os.path.dirname(directory)
3258 if directory + os.path.sep == root_dir or directory == old_d:
3259 break
3260 buckets.setdefault(directory, {})
3261
3262 root_prefix = len(root_dir) + 1 if root_dir else 0
3263 for directory in sorted(buckets, reverse=True):
maruel12e30012015-10-09 11:55:35 -07003264 if not fs.isdir(directory):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003265 logging.debug(
3266 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3267 continue
maruel12e30012015-10-09 11:55:35 -07003268 actual = set(f for f in fs.listdir(directory) if not blacklist(f))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003269 expected = set(buckets[directory])
3270 if not (actual - expected):
3271 parent = os.path.dirname(directory)
3272 buckets[parent][os.path.basename(directory)] = Results.Directory(
3273 root_dir,
3274 directory[root_prefix:],
3275 False,
3276 sum(f.size for f in buckets[directory].itervalues()),
3277 sum(f.nb_files for f in buckets[directory].itervalues()))
3278 # Remove the whole bucket.
3279 del buckets[directory]
3280
3281 # Reverse the mapping with what remains. The original instances are returned,
3282 # so the cached meta data is kept.
3283 files = sum((x.values() for x in buckets.itervalues()), [])
3284 return sorted(files, key=lambda x: x.path)
3285
3286
3287def trace(logfile, cmd, cwd, api, output):
3288 """Traces an executable. Returns (returncode, output) from api.
3289
3290 Arguments:
3291 - logfile: file to write to.
3292 - cmd: command to run.
3293 - cwd: current directory to start the process in.
3294 - api: a tracing api instance.
3295 - output: if True, returns output, otherwise prints it at the console.
3296 """
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003297 cmd = tools.fix_python_path(cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003298 api.clean_trace(logfile)
3299 with api.get_tracer(logfile) as tracer:
3300 return tracer.trace(cmd, cwd, 'default', output)
3301
3302
maruel@chromium.orge5322512013-08-19 20:17:57 +00003303def CMDclean(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003304 """Cleans up traces."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003305 options, args = parser.parse_args(args)
3306 api = get_api()
3307 api.clean_trace(options.log)
3308 return 0
3309
3310
maruel@chromium.orge5322512013-08-19 20:17:57 +00003311def CMDtrace(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003312 """Traces an executable."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003313 parser.allow_interspersed_args = False
3314 parser.add_option(
3315 '-q', '--quiet', action='store_true',
3316 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003317 parser.add_option(
3318 '-s', '--sudo', action='store_true',
3319 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3320 parser.add_option(
3321 '-n', '--no-sudo', action='store_false',
3322 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003323 options, args = parser.parse_args(args)
3324
3325 if not args:
3326 parser.error('Please provide a command to run')
3327
Marc-Antoine Ruel5906fcd2013-11-08 21:02:24 -05003328 if not can_trace():
3329 parser.error('Please rerun this program with admin privileges')
3330
maruel12e30012015-10-09 11:55:35 -07003331 if not os.path.isabs(args[0]) and fs.access(args[0], os.X_OK):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003332 args[0] = os.path.abspath(args[0])
3333
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003334 # options.sudo default value is None, which is to do whatever tracer defaults
3335 # do.
3336 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003337 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3338
3339
maruel@chromium.orge5322512013-08-19 20:17:57 +00003340def CMDread(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003341 """Reads the logs and prints the result."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003342 parser.add_option(
3343 '-V', '--variable',
3344 nargs=2,
3345 action='append',
3346 dest='variables',
3347 metavar='VAR_NAME directory',
3348 default=[],
3349 help=('Variables to replace relative directories against. Example: '
3350 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3351 'home dir with $HOME') % getpass.getuser())
3352 parser.add_option(
3353 '--root-dir',
3354 help='Root directory to base everything off it. Anything outside of this '
3355 'this directory will not be reported')
3356 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003357 '--trace-name',
3358 help='Only reads one of the trace. Defaults to reading all traces')
3359 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003360 '-j', '--json', action='store_true',
3361 help='Outputs raw result data as json')
3362 parser.add_option(
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003363 '--trace-blacklist', action='append', default=[],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003364 help='List of regexp to use as blacklist filter')
3365 options, args = parser.parse_args(args)
3366
3367 if options.root_dir:
maruel@chromium.org561d4b22013-09-26 21:08:08 +00003368 options.root_dir = file_path.get_native_path_case(
maruel@chromium.org7dc17b32013-04-02 22:27:05 +00003369 unicode(os.path.abspath(options.root_dir)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003370
3371 variables = dict(options.variables)
3372 api = get_api()
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05003373 blacklist = tools.gen_blacklist(options.trace_blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003374 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003375 # Process each trace.
3376 output_as_json = []
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003377 try:
3378 for item in data:
3379 if 'exception' in item:
3380 # Do not abort the other traces.
3381 print >> sys.stderr, (
3382 'Trace %s: Got an exception: %s' % (
3383 item['trace'], item['exception'][1]))
3384 continue
3385 results = item['results']
3386 if options.root_dir:
3387 results = results.strip_root(options.root_dir)
3388
3389 if options.json:
3390 output_as_json.append(results.flatten())
3391 else:
3392 simplified = extract_directories(
3393 options.root_dir, results.files, blacklist)
3394 simplified = [f.replace_variables(variables) for f in simplified]
3395 if len(data) > 1:
3396 print('Trace: %s' % item['trace'])
3397 print('Total: %d' % len(results.files))
3398 print('Non existent: %d' % len(results.non_existent))
3399 for f in results.non_existent:
3400 print(' %s' % f.path)
3401 print(
3402 'Interesting: %d reduced to %d' % (
3403 len(results.existent), len(simplified)))
3404 for f in simplified:
3405 print(' %s' % f.path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003406
3407 if options.json:
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05003408 tools.write_json(sys.stdout, output_as_json, False)
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003409 except KeyboardInterrupt:
3410 return 1
3411 except IOError as e:
3412 if e.errno == errno.EPIPE:
3413 # Do not print a stack trace when the output is piped to less and the user
3414 # quits before the whole output was written.
3415 return 1
3416 raise
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003417 return 0
3418
3419
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04003420class OptionParserTraceInputs(logging_utils.OptionParserWithLogging):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003421 """Adds automatic --log handling."""
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003422
3423 # Disable --log-file options since both --log and --log-file options are
3424 # confusing.
3425 # TODO(vadimsh): Rename --log-file or --log to something else.
3426 enable_log_file = False
3427
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003428 def __init__(self, **kwargs):
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04003429 logging_utils.OptionParserWithLogging.__init__(self, **kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003430 self.add_option(
3431 '-l', '--log', help='Log file to generate or read, required')
3432
3433 def parse_args(self, *args, **kwargs):
3434 """Makes sure the paths make sense.
3435
3436 On Windows, / and \ are often mixed together in a path.
3437 """
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04003438 options, args = logging_utils.OptionParserWithLogging.parse_args(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003439 self, *args, **kwargs)
3440 if not options.log:
3441 self.error('Must supply a log file with -l')
maruel12e30012015-10-09 11:55:35 -07003442 options.log = unicode(os.path.abspath(options.log))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003443 return options, args
3444
3445
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003446def main(argv):
maruel@chromium.orge5322512013-08-19 20:17:57 +00003447 dispatcher = subcommand.CommandDispatcher(__name__)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003448 try:
maruel@chromium.org3d671992013-08-20 00:38:27 +00003449 return dispatcher.execute(
3450 OptionParserTraceInputs(version=__version__), argv)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003451 except TracingFailure, e:
3452 sys.stderr.write('\nError: ')
3453 sys.stderr.write(str(e))
3454 sys.stderr.write('\n')
3455 return 1
3456
3457
3458if __name__ == '__main__':
maruel8e4e40c2016-05-30 06:21:07 -07003459 subprocess42.inhibit_os_error_reporting()
maruel@chromium.orge5322512013-08-19 20:17:57 +00003460 fix_encoding.fix_encoding()
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003461 tools.disable_buffering()
maruel@chromium.orge5322512013-08-19 20:17:57 +00003462 colorama.init()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003463 sys.exit(main(sys.argv[1:]))