blob: 4dc5a2d53b866935a443ba57fe17f6ae99c7e52f [file] [log] [blame]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001#!/usr/bin/env python
2# coding=utf-8
maruelea586f32016-04-05 11:11:33 -07003# Copyright 2012 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07004# Use of this source code is governed under the Apache License, Version 2.0
5# that can be found in the LICENSE file.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00006
7"""Traces an executable and its child processes and extract the files accessed
8by them.
9
10The implementation uses OS-specific API. The native Kernel logger and the ETL
11interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12The OS-specific implementation is hidden in an 'API' interface.
13
14The results are embedded in a Results instance. The tracing is done in two
15phases, the first is to do the actual trace and generate an
16implementation-specific log file. Then the log file is parsed to extract the
17information, including the individual child processes and the files accessed
18from the log.
19"""
20
21import codecs
22import csv
maruel@chromium.org9e56b592013-05-21 16:56:05 +000023import errno
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000024import getpass
25import glob
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000026import logging
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000027import os
28import re
maruel@chromium.org9b2112a2013-04-05 18:01:02 +000029import stat
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000030import subprocess
31import sys
32import tempfile
33import threading
34import time
35import weakref
36
maruel@chromium.orge5322512013-08-19 20:17:57 +000037from third_party import colorama
38from third_party.depot_tools import fix_encoding
39from third_party.depot_tools import subcommand
40
maruel@chromium.org561d4b22013-09-26 21:08:08 +000041from utils import file_path
maruel12e30012015-10-09 11:55:35 -070042from utils import fs
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -040043from utils import logging_utils
maruel8e4e40c2016-05-30 06:21:07 -070044from utils import subprocess42
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000045from utils import tools
46
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000047## OS-specific imports
48
49if sys.platform == 'win32':
maruel@chromium.org561d4b22013-09-26 21:08:08 +000050 from ctypes.wintypes import byref, c_int, c_wchar_p
51 from ctypes.wintypes import windll # pylint: disable=E0611
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000052
53
maruel12e30012015-10-09 11:55:35 -070054__version__ = '0.2'
maruel@chromium.org3d671992013-08-20 00:38:27 +000055
56
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000057BASE_DIR = os.path.dirname(os.path.abspath(__file__))
58ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
59
60
61class TracingFailure(Exception):
62 """An exception occured during tracing."""
63 def __init__(self, description, pid, line_number, line, *args):
64 super(TracingFailure, self).__init__(
65 description, pid, line_number, line, *args)
66 self.description = description
67 self.pid = pid
68 self.line_number = line_number
69 self.line = line
70 self.extra = args
71
72 def __str__(self):
73 out = self.description
74 if self.pid:
75 out += '\npid: %d' % self.pid
76 if self.line_number:
77 out += '\nline: %d' % self.line_number
78 if self.line:
79 out += '\n%s' % self.line
80 if self.extra:
81 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
82 return out
83
84
85## OS-specific functions
86
87if sys.platform == 'win32':
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000088 def get_current_encoding():
89 """Returns the 'ANSI' code page associated to the process."""
90 return 'cp%d' % int(windll.kernel32.GetACP())
91
92
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000093 def CommandLineToArgvW(command_line):
94 """Splits a commandline into argv using CommandLineToArgvW()."""
95 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
96 size = c_int()
maruel@chromium.org306e0e72012-11-02 18:22:03 +000097 assert isinstance(command_line, unicode)
98 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +000099 try:
100 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
101 finally:
102 windll.kernel32.LocalFree(ptr)
103
104
Marc-Antoine Ruel5906fcd2013-11-08 21:02:24 -0500105def can_trace():
106 """Returns True if the user is an administrator on Windows.
107
108 It is required for tracing to work.
109 """
110 if sys.platform != 'win32':
111 return True
112 return bool(windll.shell32.IsUserAnAdmin())
113
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000114
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000115def create_subprocess_thunk():
116 """Creates a small temporary script to start the child process.
117
118 This thunk doesn't block, its unique name is used to identify it as the
119 parent.
120 """
Marc-Antoine Ruel3c979cb2015-03-11 13:43:28 -0400121 handle, name = tempfile.mkstemp(prefix=u'trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000122 try:
123 os.write(
124 handle,
125 (
126 'import subprocess, sys\n'
127 'sys.exit(subprocess.call(sys.argv[2:]))\n'
128 ))
129 finally:
130 os.close(handle)
131 return name
132
133
134def create_exec_thunk():
135 """Creates a small temporary script to start the child executable.
136
137 Reads from the file handle provided as the fisrt argument to block, then
138 execv() the command to be traced.
139 """
Marc-Antoine Ruel3c979cb2015-03-11 13:43:28 -0400140 handle, name = tempfile.mkstemp(prefix=u'trace_inputs_thunk', suffix='.py')
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000141 try:
142 os.write(
143 handle,
144 (
145 'import os, sys\n'
146 'fd = int(sys.argv[1])\n'
147 # This will block until the controlling process writes a byte on the
148 # pipe. It will do so once the tracing tool, e.g. strace, is ready to
149 # trace.
150 'os.read(fd, 1)\n'
151 'os.close(fd)\n'
152 'os.execv(sys.argv[2], sys.argv[2:])\n'
153 ))
154 finally:
155 os.close(handle)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000156 return name
157
158
159def strace_process_quoted_arguments(text):
160 """Extracts quoted arguments on a string and return the arguments as a list.
161
162 Implemented as an automaton. Supports incomplete strings in the form
163 '"foo"...'.
164
165 Example:
166 With text = '"foo", "bar"', the function will return ['foo', 'bar']
167
168 TODO(maruel): Implement escaping.
169 """
170 # All the possible states of the DFA.
171 ( NEED_QUOTE, # Begining of a new arguments.
172 INSIDE_STRING, # Inside an argument.
173 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
174 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
175 # a serie of 3 dots or a comma.
176 NEED_SPACE, # Right after a comma
177 NEED_DOT_2, # Found a dot, need a second one.
178 NEED_DOT_3, # Found second dot, need a third one.
179 NEED_COMMA, # Found third dot, need a comma.
180 ) = range(8)
181
182 state = NEED_QUOTE
183 out = []
184 for index, char in enumerate(text):
185 if char == '"':
186 if state == NEED_QUOTE:
187 state = INSIDE_STRING
188 # A new argument was found.
189 out.append('')
190 elif state == INSIDE_STRING:
191 # The argument is now closed.
192 state = NEED_COMMA_OR_DOT
193 elif state == ESCAPED:
194 out[-1] += char
195 state = INSIDE_STRING
196 else:
197 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000198 'Can\'t process char \'%s\' at column %d for: %r' % (
199 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000200 index,
201 text)
202 elif char == ',':
203 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
204 state = NEED_SPACE
205 elif state == INSIDE_STRING:
206 out[-1] += char
207 elif state == ESCAPED:
208 out[-1] += char
209 state = INSIDE_STRING
210 else:
211 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000212 'Can\'t process char \'%s\' at column %d for: %r' % (
213 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000214 index,
215 text)
216 elif char == ' ':
217 if state == NEED_SPACE:
218 state = NEED_QUOTE
219 elif state == INSIDE_STRING:
220 out[-1] += char
221 elif state == ESCAPED:
222 out[-1] += char
223 state = INSIDE_STRING
224 else:
225 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000226 'Can\'t process char \'%s\' at column %d for: %r' % (
227 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000228 index,
229 text)
230 elif char == '.':
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000231 if state in (NEED_QUOTE, NEED_COMMA_OR_DOT):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000232 # The string is incomplete, this mean the strace -s flag should be
233 # increased.
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000234 # For NEED_QUOTE, the input string would look like '"foo", ...'.
235 # For NEED_COMMA_OR_DOT, the input string would look like '"foo"...'
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000236 state = NEED_DOT_2
237 elif state == NEED_DOT_2:
238 state = NEED_DOT_3
239 elif state == NEED_DOT_3:
240 state = NEED_COMMA
241 elif state == INSIDE_STRING:
242 out[-1] += char
243 elif state == ESCAPED:
244 out[-1] += char
245 state = INSIDE_STRING
246 else:
247 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000248 'Can\'t process char \'%s\' at column %d for: %r' % (
249 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000250 index,
251 text)
252 elif char == '\\':
253 if state == ESCAPED:
254 out[-1] += char
255 state = INSIDE_STRING
256 elif state == INSIDE_STRING:
257 state = ESCAPED
258 else:
259 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000260 'Can\'t process char \'%s\' at column %d for: %r' % (
261 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000262 index,
263 text)
264 else:
265 if state == INSIDE_STRING:
266 out[-1] += char
267 else:
268 raise ValueError(
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000269 'Can\'t process char \'%s\' at column %d for: %r' % (
270 char, index, text),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000271 index,
272 text)
273 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
274 raise ValueError(
275 'String is incorrectly terminated: %r' % text,
276 text)
277 return out
278
279
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000280def assert_is_renderable(pseudo_string):
281 """Asserts the input is a valid object to be processed by render()."""
282 assert (
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000283 pseudo_string is None or
284 isinstance(pseudo_string, unicode) or
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000285 hasattr(pseudo_string, 'render')), repr(pseudo_string)
286
287
288def render(pseudo_string):
289 """Converts the pseudo-string to an unicode string."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000290 if pseudo_string is None or isinstance(pseudo_string, unicode):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000291 return pseudo_string
292 return pseudo_string.render()
293
294
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000295class Results(object):
296 """Results of a trace session."""
297
298 class _TouchedObject(object):
299 """Something, a file or a directory, that was accessed."""
300 def __init__(self, root, path, tainted, size, nb_files):
301 logging.debug(
302 '%s(%s, %s, %s, %s, %s)' %
303 (self.__class__.__name__, root, path, tainted, size, nb_files))
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000304 assert_is_renderable(root)
305 assert_is_renderable(path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000306 self.root = root
307 self.path = path
308 self.tainted = tainted
309 self.nb_files = nb_files
maruel@chromium.org538141b2013-06-03 20:57:17 +0000310 # Can be used as a cache or a default value, depending on context. In
311 # particular, once self.tainted is True, because the path was replaced
312 # with a variable, it is not possible to look up the file size.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000313 self._size = size
314 # These are cache only.
315 self._real_path = None
316
317 # Check internal consistency.
318 assert path, path
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000319 assert tainted or bool(root) != bool(file_path.isabs(path)), (root, path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000320 assert tainted or (
maruel12e30012015-10-09 11:55:35 -0700321 not fs.exists(self.full_path) or
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000322 (self.full_path == file_path.get_native_path_case(self.full_path))), (
323 tainted,
324 self.full_path,
325 file_path.get_native_path_case(self.full_path))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000326
327 @property
328 def existent(self):
329 return self.size != -1
330
331 @property
332 def full_path(self):
333 if self.root:
334 return os.path.join(self.root, self.path)
335 return self.path
336
337 @property
338 def real_path(self):
339 """Returns the path with symlinks resolved."""
340 if not self._real_path:
maruel12e30012015-10-09 11:55:35 -0700341 self._real_path = fs.path.realpath(self.full_path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000342 return self._real_path
343
344 @property
345 def size(self):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000346 """File's size. -1 is not existent.
347
348 Once tainted, it is not possible the retrieve the file size anymore since
349 the path is composed of variables.
350 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000351 if self._size is None and not self.tainted:
352 try:
maruel12e30012015-10-09 11:55:35 -0700353 self._size = fs.stat(self.full_path).st_size
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000354 except OSError:
355 self._size = -1
356 return self._size
357
358 def flatten(self):
359 """Returns a dict representing this object.
360
361 A 'size' of 0 means the file was only touched and not read.
362 """
363 return {
364 'path': self.path,
365 'size': self.size,
366 }
367
368 def replace_variables(self, variables):
369 """Replaces the root of this File with one of the variables if it matches.
370
371 If a variable replacement occurs, the cloned object becomes tainted.
372 """
373 for variable, root_path in variables.iteritems():
374 if self.path.startswith(root_path):
375 return self._clone(
376 self.root, variable + self.path[len(root_path):], True)
377 # No need to clone, returns ourself.
378 return self
379
380 def strip_root(self, root):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000381 """Returns a clone of itself with 'root' stripped off.
382
383 Note that the file is kept if it is either accessible from a symlinked
384 path that was used to access the file or through the real path.
385 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000386 # Check internal consistency.
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000387 assert (
388 self.tainted or
389 (file_path.isabs(root) and root.endswith(os.path.sep))), root
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000390 if not self.full_path.startswith(root):
391 # Now try to resolve the symlinks to see if it can be reached this way.
392 # Only try *after* trying without resolving symlink.
393 if not self.real_path.startswith(root):
394 return None
395 path = self.real_path
396 else:
397 path = self.full_path
398 return self._clone(root, path[len(root):], self.tainted)
399
400 def _clone(self, new_root, new_path, tainted):
401 raise NotImplementedError(self.__class__.__name__)
402
403 class File(_TouchedObject):
404 """A file that was accessed. May not be present anymore.
405
406 If tainted is true, it means it is not a real path anymore as a variable
407 replacement occured.
408
maruel@chromium.org538141b2013-06-03 20:57:17 +0000409 |mode| can be one of None, TOUCHED, READ or WRITE.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000410 """
maruel@chromium.org538141b2013-06-03 20:57:17 +0000411 # Was probed for existence, and it is existent, but was never _opened_.
412 TOUCHED = 't'
413 # Opened for read only and guaranteed to not have been written to.
414 READ = 'r'
415 # Opened for write.
416 WRITE = 'w'
417
418 # They are listed in order of priority. E.g. if a file is traced as TOUCHED
419 # then as WRITE, only keep WRITE. None means no idea, which is a problem on
420 # Windows.
421 ACCEPTABLE_MODES = (None, TOUCHED, READ, WRITE)
422
423 def __init__(self, root, path, tainted, size, mode):
424 assert mode in self.ACCEPTABLE_MODES
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000425 super(Results.File, self).__init__(root, path, tainted, size, 1)
maruel@chromium.org538141b2013-06-03 20:57:17 +0000426 self.mode = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000427
428 def _clone(self, new_root, new_path, tainted):
429 """Clones itself keeping meta-data."""
430 # Keep the self.size and self._real_path caches for performance reason. It
431 # is also important when the file becomes tainted (with a variable instead
432 # of the real path) since self.path is not an on-disk path anymore so
433 # out._size cannot be updated.
maruel@chromium.org538141b2013-06-03 20:57:17 +0000434 out = self.__class__(new_root, new_path, tainted, self.size, self.mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000435 out._real_path = self._real_path
436 return out
437
maruel@chromium.org538141b2013-06-03 20:57:17 +0000438 def flatten(self):
439 out = super(Results.File, self).flatten()
440 out['mode'] = self.mode
441 return out
442
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000443 class Directory(_TouchedObject):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000444 """A directory of files. Must exist.
445
446 For a Directory instance, self.size is not a cache, it's an actual value
447 that is never modified and represents the total size of the files contained
448 in this directory. It is possible that the directory is empty so that
449 size==0; this happens if there's only an invalid symlink in it.
450 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000451 def __init__(self, root, path, tainted, size, nb_files):
452 """path='.' is a valid value and must be handled appropriately."""
453 assert not path.endswith(os.path.sep), path
454 super(Results.Directory, self).__init__(
455 root, path + os.path.sep, tainted, size, nb_files)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000456
457 def flatten(self):
458 out = super(Results.Directory, self).flatten()
459 out['nb_files'] = self.nb_files
460 return out
461
462 def _clone(self, new_root, new_path, tainted):
463 """Clones itself keeping meta-data."""
464 out = self.__class__(
465 new_root,
466 new_path.rstrip(os.path.sep),
467 tainted,
468 self.size,
469 self.nb_files)
470 out._real_path = self._real_path
471 return out
472
473 class Process(object):
474 """A process that was traced.
475
476 Contains references to the files accessed by this process and its children.
477 """
478 def __init__(self, pid, files, executable, command, initial_cwd, children):
479 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
480 self.pid = pid
481 self.files = sorted(files, key=lambda x: x.path)
482 self.children = children
483 self.executable = executable
484 self.command = command
485 self.initial_cwd = initial_cwd
486
487 # Check internal consistency.
488 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
489 f.path for f in self.files)
490 assert isinstance(self.children, list)
491 assert isinstance(self.files, list)
492
493 @property
494 def all(self):
495 for child in self.children:
496 for i in child.all:
497 yield i
498 yield self
499
500 def flatten(self):
501 return {
502 'children': [c.flatten() for c in self.children],
503 'command': self.command,
504 'executable': self.executable,
505 'files': [f.flatten() for f in self.files],
506 'initial_cwd': self.initial_cwd,
507 'pid': self.pid,
508 }
509
510 def strip_root(self, root):
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000511 assert file_path.isabs(root) and root.endswith(os.path.sep), root
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000512 # Loads the files after since they are constructed as objects.
513 out = self.__class__(
514 self.pid,
515 filter(None, (f.strip_root(root) for f in self.files)),
516 self.executable,
517 self.command,
518 self.initial_cwd,
519 [c.strip_root(root) for c in self.children])
520 logging.debug(
521 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
522 return out
523
524 def __init__(self, process):
525 self.process = process
526 # Cache.
527 self._files = None
528
529 def flatten(self):
530 return {
531 'root': self.process.flatten(),
532 }
533
534 @property
535 def files(self):
536 if self._files is None:
537 self._files = sorted(
538 sum((p.files for p in self.process.all), []),
539 key=lambda x: x.path)
540 return self._files
541
542 @property
543 def existent(self):
544 return [f for f in self.files if f.existent]
545
546 @property
547 def non_existent(self):
548 return [f for f in self.files if not f.existent]
549
550 def strip_root(self, root):
551 """Returns a clone with all the files outside the directory |root| removed
552 and converts all the path to be relative paths.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000553
554 It keeps files accessible through the |root| directory or that have been
555 accessed through any symlink which points to the same directory.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000556 """
557 # Resolve any symlink
maruel12e30012015-10-09 11:55:35 -0700558 root = fs.realpath(root)
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000559 root = (
560 file_path.get_native_path_case(root).rstrip(os.path.sep) + os.path.sep)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000561 logging.debug('strip_root(%s)' % root)
562 return Results(self.process.strip_root(root))
563
564
565class ApiBase(object):
566 """OS-agnostic API to trace a process and its children."""
567 class Context(object):
568 """Processes one log line at a time and keeps the list of traced processes.
569
570 The parsing is complicated by the fact that logs are traced out of order for
571 strace but in-order for dtrace and logman. In addition, on Windows it is
572 very frequent that processids are reused so a flat list cannot be used. But
573 at the same time, it is impossible to faithfully construct a graph when the
574 logs are processed out of order. So both a tree and a flat mapping are used,
575 the tree is the real process tree, while the flat mapping stores the last
576 valid process for the corresponding processid. For the strace case, the
577 tree's head is guessed at the last moment.
578 """
579 class Process(object):
580 """Keeps context for one traced child process.
581
582 Logs all the files this process touched. Ignores directories.
583 """
584 def __init__(self, blacklist, pid, initial_cwd):
585 # Check internal consistency.
586 assert isinstance(pid, int), repr(pid)
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000587 assert_is_renderable(initial_cwd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000588 self.pid = pid
589 # children are Process instances.
590 self.children = []
591 self.initial_cwd = initial_cwd
592 self.cwd = None
maruel@chromium.org538141b2013-06-03 20:57:17 +0000593 self.files = {}
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000594 self.executable = None
595 self.command = None
596 self._blacklist = blacklist
597
598 def to_results_process(self):
599 """Resolves file case sensitivity and or late-bound strings."""
600 # When resolving files, it's normal to get dupe because a file could be
601 # opened multiple times with different case. Resolve the deduplication
602 # here.
603 def fix_path(x):
604 """Returns the native file path case.
605
606 Converts late-bound strings.
607 """
608 if not x:
609 # Do not convert None instance to 'None'.
610 return x
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000611 x = render(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000612 if os.path.isabs(x):
613 # If the path is not absolute, which tends to happen occasionally on
614 # Windows, it is not possible to get the native path case so ignore
615 # that trace. It mostly happens for 'executable' value.
maruel@chromium.org561d4b22013-09-26 21:08:08 +0000616 x = file_path.get_native_path_case(x)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000617 return x
618
maruel@chromium.org538141b2013-06-03 20:57:17 +0000619 def fix_and_blacklist_path(x, m):
620 """Receives a tuple (filepath, mode) and processes filepath."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000621 x = fix_path(x)
622 if not x:
623 return
624 # The blacklist needs to be reapplied, since path casing could
625 # influence blacklisting.
626 if self._blacklist(x):
627 return
maruel@chromium.org538141b2013-06-03 20:57:17 +0000628 # Filters out directories. Some may have passed through.
maruel12e30012015-10-09 11:55:35 -0700629 if fs.isdir(x):
maruel@chromium.org538141b2013-06-03 20:57:17 +0000630 return
631 return x, m
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000632
maruel@chromium.org538141b2013-06-03 20:57:17 +0000633 # Renders all the files as strings, as some could be RelativePath
634 # instances. It is important to do it first since there could still be
635 # multiple entries with the same path but different modes.
636 rendered = (
637 fix_and_blacklist_path(f, m) for f, m in self.files.iteritems())
638 files = sorted(
639 (f for f in rendered if f),
640 key=lambda x: (x[0], Results.File.ACCEPTABLE_MODES.index(x[1])))
641 # Then converting into a dict will automatically clean up lesser
642 # important values.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000643 files = [
maruel@chromium.org538141b2013-06-03 20:57:17 +0000644 Results.File(None, f, False, None, m)
645 for f, m in dict(files).iteritems()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000646 ]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000647 return Results.Process(
648 self.pid,
649 files,
650 fix_path(self.executable),
651 self.command,
652 fix_path(self.initial_cwd),
653 [c.to_results_process() for c in self.children])
654
maruel@chromium.org538141b2013-06-03 20:57:17 +0000655 def add_file(self, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000656 """Adds a file if it passes the blacklist."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000657 if self._blacklist(render(filepath)):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000658 return
maruel@chromium.org538141b2013-06-03 20:57:17 +0000659 logging.debug('add_file(%d, %s, %s)', self.pid, filepath, mode)
660 # Note that filepath and not render(filepath) is added. It is because
661 # filepath could be something else than a string, like a RelativePath
662 # instance for dtrace logs.
663 modes = Results.File.ACCEPTABLE_MODES
664 old_mode = self.files.setdefault(filepath, mode)
665 if old_mode != mode and modes.index(old_mode) < modes.index(mode):
666 # Take the highest value.
667 self.files[filepath] = mode
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000668
669 def __init__(self, blacklist):
670 self.blacklist = blacklist
671 # Initial process.
672 self.root_process = None
673 # dict to accelerate process lookup, to not have to lookup the whole graph
674 # each time.
675 self._process_lookup = {}
676
677 class Tracer(object):
678 """During it's lifetime, the tracing subsystem is enabled."""
679 def __init__(self, logname):
680 self._logname = logname
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000681 self._lock = threading.RLock()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000682 self._traces = []
683 self._initialized = True
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000684 self._scripts_to_cleanup = []
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000685
686 def trace(self, cmd, cwd, tracename, output):
687 """Runs the OS-specific trace program on an executable.
688
689 Arguments:
690 - cmd: The command (a list) to run.
691 - cwd: Current directory to start the child process in.
692 - tracename: Name of the trace in the logname file.
693 - output: If False, redirects output to PIPEs.
694
695 Returns a tuple (resultcode, output) and updates the internal trace
696 entries.
697 """
698 # The implementation adds an item to self._traces.
699 raise NotImplementedError(self.__class__.__name__)
700
701 def close(self, _timeout=None):
702 """Saves the meta-data in the logname file.
703
704 For kernel-based tracing, stops the tracing subsystem.
705
706 Must not be used manually when using 'with' construct.
707 """
708 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000709 if not self._initialized:
710 raise TracingFailure(
711 'Called %s.close() on an unitialized object' %
712 self.__class__.__name__,
713 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000714 try:
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000715 while self._scripts_to_cleanup:
716 try:
maruel12e30012015-10-09 11:55:35 -0700717 fs.remove(self._scripts_to_cleanup.pop())
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +0000718 except OSError as e:
719 logging.error('Failed to delete a temporary script: %s', e)
Marc-Antoine Ruelde011802013-11-12 15:19:47 -0500720 tools.write_json(self._logname, self._gen_logdata(), False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000721 finally:
722 self._initialized = False
723
724 def post_process_log(self):
725 """Post-processes the log so it becomes faster to load afterward.
726
727 Must not be used manually when using 'with' construct.
728 """
729 assert not self._initialized, 'Must stop tracing first.'
730
maruel@chromium.org808e3f22013-05-16 16:42:54 +0000731 def _gen_logdata(self):
732 """Returns the data to be saved in the trace file."""
733 return {
734 'traces': self._traces,
735 }
736
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000737 def __enter__(self):
738 """Enables 'with' statement."""
739 return self
740
741 def __exit__(self, exc_type, exc_value, traceback):
742 """Enables 'with' statement."""
743 self.close()
744 # If an exception was thrown, do not process logs.
745 if not exc_type:
746 self.post_process_log()
747
748 def get_tracer(self, logname):
749 """Returns an ApiBase.Tracer instance.
750
751 Initializes the tracing subsystem, which is a requirement for kernel-based
752 tracers. Only one tracer instance should be live at a time!
753
754 logname is the filepath to the json file that will contain the meta-data
755 about the logs.
756 """
757 return self.Tracer(logname)
758
759 @staticmethod
760 def clean_trace(logname):
761 """Deletes an old log."""
762 raise NotImplementedError()
763
764 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +0000765 def parse_log(cls, logname, blacklist, trace_name):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000766 """Processes trace logs and returns the files opened and the files that do
767 not exist.
768
769 It does not track directories.
770
maruel@chromium.orgec74ff82012-10-29 18:14:47 +0000771 Arguments:
772 - logname: must be an absolute path.
773 - blacklist: must be a lambda.
774 - trace_name: optional trace to read, defaults to reading all traces.
maruel@chromium.org8b056ba2012-10-16 14:04:49 +0000775
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000776 Most of the time, files that do not exist are temporary test files that
777 should be put in /tmp instead. See http://crbug.com/116251.
778
779 Returns a list of dict with keys:
780 - results: A Results instance.
781 - trace: The corresponding tracename parameter provided to
782 get_tracer().trace().
783 - output: Output gathered during execution, if get_tracer().trace(...,
784 output=False) was used.
785 """
786 raise NotImplementedError(cls.__class__.__name__)
787
788
789class Strace(ApiBase):
790 """strace implies linux."""
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000791 @staticmethod
792 def load_filename(filename):
793 """Parses a filename in a log."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000794 # TODO(maruel): Be compatible with strace -x.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000795 assert isinstance(filename, str)
796 out = ''
797 i = 0
798 while i < len(filename):
799 c = filename[i]
800 if c == '\\':
801 out += chr(int(filename[i+1:i+4], 8))
802 i += 4
803 else:
804 out += c
805 i += 1
806 # TODO(maruel): That's not necessarily true that the current code page is
807 # utf-8.
808 return out.decode('utf-8')
809
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000810 class Context(ApiBase.Context):
811 """Processes a strace log line and keeps the list of existent and non
812 existent files accessed.
813
814 Ignores directories.
815
816 Uses late-binding to processes the cwd of each process. The problem is that
817 strace generates one log file per process it traced but doesn't give any
maruel@chromium.org2909b7c2013-05-29 18:52:59 +0000818 information about which process was started when and by who. So process the
819 logs out of order and use late binding with RelativePath to be able to
820 deduce the initial directory of each process once all the logs are parsed.
821
822 TODO(maruel): Use the script even in the non-sudo case, so log parsing can
823 be done in two phase: first find the root process, then process the child
824 processes in order. With that, it should be possible to not use RelativePath
825 anymore. This would significantly simplify the code!
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000826 """
827 class Process(ApiBase.Context.Process):
828 """Represents the state of a process.
829
830 Contains all the information retrieved from the pid-specific log.
831 """
832 # Function names are using ([a-z_0-9]+)
833 # This is the most common format. function(args) = result
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000834 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.*?)\)\s+= (.+)$')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000835 # An interrupted function call, only grab the minimal header.
836 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
837 # A resumed function call.
838 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
839 # A process received a signal.
840 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
841 # A process didn't handle a signal. Ignore any junk appearing before,
842 # because the process was forcibly killed so it won't open any new file.
843 RE_KILLED = re.compile(
844 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
845 # The process has exited.
846 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
847 # A call was canceled. Ignore any prefix.
848 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
849 # Happens when strace fails to even get the function name.
850 UNNAMED_FUNCTION = '????'
851
852 # Corner-case in python, a class member function decorator must not be
853 # @staticmethod.
854 def parse_args(regexp, expect_zero): # pylint: disable=E0213
855 """Automatically convert the str 'args' into a list of processed
856 arguments.
857
858 Arguments:
859 - regexp is used to parse args.
860 - expect_zero: one of True, False or None.
861 - True: will check for result.startswith('0') first and will ignore
862 the trace line completely otherwise. This is important because for
863 many functions, the regexp will not process if the call failed.
864 - False: will check for not result.startswith(('?', '-1')) for the
865 same reason than with True.
866 - None: ignore result.
867 """
868 def meta_hook(function):
869 assert function.__name__.startswith('handle_')
870 def hook(self, args, result):
871 if expect_zero is True and not result.startswith('0'):
872 return
873 if expect_zero is False and result.startswith(('?', '-1')):
874 return
875 match = re.match(regexp, args)
876 if not match:
877 raise TracingFailure(
878 'Failed to parse %s(%s) = %s' %
879 (function.__name__[len('handle_'):], args, result),
880 None, None, None)
881 return function(self, match.groups(), result)
882 return hook
883 return meta_hook
884
885 class RelativePath(object):
886 """A late-bound relative path."""
887 def __init__(self, parent, value):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000888 assert_is_renderable(parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000889 self.parent = parent
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000890 assert (
891 value is None or
892 (isinstance(value, unicode) and not os.path.isabs(value)))
893 self.value = value
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000894 if self.value:
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000895 # TODO(maruel): On POSIX, '\\' is a valid character so remove this
896 # assert.
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000897 assert '\\' not in self.value, value
898 assert '\\' not in self.value, (repr(value), repr(self.value))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000899
900 def render(self):
901 """Returns the current directory this instance is representing.
902
903 This function is used to return the late-bound value.
904 """
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000905 assert self.parent is not None
906 parent = render(self.parent)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000907 if self.value:
908 return os.path.normpath(os.path.join(parent, self.value))
909 return parent
910
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000911 def __init__(self, root, pid):
912 """Keeps enough information to be able to guess the original process
913 root.
914
915 strace doesn't store which process was the initial process. So more
916 information needs to be kept so the graph can be reconstructed from the
917 flat map.
918 """
919 logging.info('%s(%d)' % (self.__class__.__name__, pid))
920 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
921 assert isinstance(root, ApiBase.Context)
922 self._root = weakref.ref(root)
923 # The dict key is the function name of the pending call, like 'open'
924 # or 'execve'.
925 self._pending_calls = {}
926 self._line_number = 0
927 # Current directory when the process started.
maruel@chromium.orgbea76be2013-06-03 20:59:25 +0000928 if isinstance(self._root(), unicode):
929 self.initial_cwd = self._root()
930 else:
931 self.initial_cwd = self.RelativePath(self._root(), None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000932 self.parentid = None
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +0000933 self._done = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000934
935 def get_cwd(self):
936 """Returns the best known value of cwd."""
937 return self.cwd or self.initial_cwd
938
939 def render(self):
940 """Returns the string value of the RelativePath() object.
941
942 Used by RelativePath. Returns the initial directory and not the
943 current one since the current directory 'cwd' validity is time-limited.
944
945 The validity is only guaranteed once all the logs are processed.
946 """
947 return self.initial_cwd.render()
948
949 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +0000950 assert isinstance(line, str)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000951 self._line_number += 1
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000952 try:
maruel@chromium.org588e09d2013-05-21 14:38:57 +0000953 if self._done:
954 raise TracingFailure(
955 'Found a trace for a terminated process or corrupted log',
956 None, None, None)
957
958 if self.RE_SIGNAL.match(line):
959 # Ignore signals.
960 return
961
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +0000962 match = self.RE_KILLED.match(line)
963 if match:
964 # Converts a '+++ killed by Foo +++' trace into an exit_group().
965 self.handle_exit_group(match.group(1), None)
966 return
967
968 match = self.RE_PROCESS_EXITED.match(line)
969 if match:
970 # Converts a '+++ exited with 1 +++' trace into an exit_group()
971 self.handle_exit_group(match.group(1), None)
972 return
973
974 match = self.RE_UNFINISHED.match(line)
975 if match:
976 if match.group(1) in self._pending_calls:
977 raise TracingFailure(
978 'Found two unfinished calls for the same function',
979 None, None, None,
980 self._pending_calls)
981 self._pending_calls[match.group(1)] = (
982 match.group(1) + match.group(2))
983 return
984
985 match = self.RE_UNAVAILABLE.match(line)
986 if match:
987 # This usually means a process was killed and a pending call was
988 # canceled.
989 # TODO(maruel): Look up the last exit_group() trace just above and
990 # make sure any self._pending_calls[anything] is properly flushed.
991 return
992
993 match = self.RE_RESUMED.match(line)
994 if match:
995 if match.group(1) not in self._pending_calls:
996 raise TracingFailure(
997 'Found a resumed call that was not logged as unfinished',
998 None, None, None,
999 self._pending_calls)
1000 pending = self._pending_calls.pop(match.group(1))
1001 # Reconstruct the line.
1002 line = pending + match.group(2)
1003
1004 match = self.RE_HEADER.match(line)
1005 if not match:
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001006 # The line is corrupted. It happens occasionally when a process is
1007 # killed forcibly with activity going on. Assume the process died.
1008 # No other line can be processed afterward.
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001009 logging.debug('%d is done: %s', self.pid, line)
maruel@chromium.orgbcfc9e12013-03-19 18:47:17 +00001010 self._done = True
1011 return
1012
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001013 if match.group(1) == self.UNNAMED_FUNCTION:
1014 return
1015
1016 # It's a valid line, handle it.
1017 handler = getattr(self, 'handle_%s' % match.group(1), None)
1018 if not handler:
1019 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1020 return handler(match.group(2), match.group(3))
1021 except TracingFailure, e:
1022 # Hack in the values since the handler could be a static function.
1023 e.pid = self.pid
1024 e.line = line
1025 e.line_number = self._line_number
1026 # Re-raise the modified exception.
1027 raise
1028 except (KeyError, NotImplementedError, ValueError), e:
1029 raise TracingFailure(
1030 'Trace generated a %s exception: %s' % (
1031 e.__class__.__name__, str(e)),
1032 self.pid,
1033 self._line_number,
1034 line,
1035 e)
1036
1037 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1038 def handle_access(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001039 self._handle_file(args[0], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001040
1041 @parse_args(r'^\"(.+?)\"$', True)
1042 def handle_chdir(self, args, _result):
1043 """Updates cwd."""
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001044 self.cwd = self._mangle(args[0])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001045 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1046
maruel@chromium.org538141b2013-06-03 20:57:17 +00001047 @parse_args(r'^\"(.+?)\", (\d+), (\d+)$', False)
1048 def handle_chown(self, args, _result):
1049 # TODO(maruel): Look at result?
1050 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org55550722013-05-28 14:43:14 +00001051
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001052 def handle_clone(self, _args, result):
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001053 self._handling_forking('clone', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001054
1055 def handle_close(self, _args, _result):
1056 pass
1057
maruel@chromium.org538141b2013-06-03 20:57:17 +00001058 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1059 def handle_chmod(self, args, _result):
1060 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001061
maruel@chromium.org538141b2013-06-03 20:57:17 +00001062 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1063 def handle_creat(self, args, _result):
1064 self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001065
1066 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1067 def handle_execve(self, args, _result):
1068 # Even if in practice execve() doesn't returns when it succeeds, strace
1069 # still prints '0' as the result.
1070 filepath = args[0]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001071 self._handle_file(filepath, Results.File.READ)
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001072 self.executable = self._mangle(filepath)
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001073 try:
1074 self.command = strace_process_quoted_arguments(args[1])
1075 except ValueError as e:
1076 raise TracingFailure(
1077 'Failed to process command line argument:\n%s' % e.args[0],
1078 None, None, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001079
1080 def handle_exit_group(self, _args, _result):
1081 """Removes cwd."""
1082 self.cwd = None
1083
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001084 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1085 def handle_faccessat(self, args, _results):
1086 if args[0] == 'AT_FDCWD':
maruel@chromium.org538141b2013-06-03 20:57:17 +00001087 self._handle_file(args[1], Results.File.TOUCHED)
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001088 else:
1089 raise Exception('Relative faccess not implemented.')
1090
maruel@chromium.org0781f322013-05-28 19:45:49 +00001091 def handle_fallocate(self, _args, result):
1092 pass
1093
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001094 def handle_fork(self, args, result):
1095 self._handle_unknown('fork', args, result)
1096
maruel@chromium.org9f72e0f2012-10-18 14:48:49 +00001097 def handle_futex(self, _args, _result):
1098 pass
1099
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001100 @parse_args(r'^(\".+?\"|0x[a-f0-9]+), (\d+)$', False)
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001101 def handle_getcwd(self, args, _result):
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001102 # TODO(maruel): Resolve file handle.
1103 if not args[0].startswith('0x'):
1104 filepath = args[0][1:-1]
1105 if os.path.isabs(filepath):
1106 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1107 if not isinstance(self.cwd, unicode):
1108 # Take the occasion to reset the path.
1109 self.cwd = self._mangle(filepath)
1110 else:
1111 # It should always match.
1112 assert self.cwd == Strace.load_filename(filepath), (
1113 self.cwd, filepath)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001114
1115 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1116 def handle_link(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001117 self._handle_file(args[0], Results.File.READ)
1118 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001119
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001120 @parse_args(r'^(\".+?\"|0x[a-f0-9]+), \{(?:.+?, )?\.\.\.\}$', True)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001121 def handle_lstat(self, args, _result):
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001122 # TODO(maruel): Resolve file handle.
1123 if not args[0].startswith('0x'):
1124 self._handle_file(args[0][1:-1], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001125
1126 def handle_mkdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001127 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001128 pass
1129
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001130 @parse_args(r'^(\".*?\"|0x[a-f0-9]+), ([A-Z\_\|]+)(|, \d+)$', False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001131 def handle_open(self, args, _result):
1132 if 'O_DIRECTORY' in args[1]:
1133 return
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001134 # TODO(maruel): Resolve file handle.
1135 if not args[0].startswith('0x'):
1136 t = Results.File.READ if 'O_RDONLY' in args[1] else Results.File.WRITE
1137 self._handle_file(args[0][1:-1], t)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001138
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001139 @parse_args(
1140 r'^(\d+|AT_FDCWD), (\".*?\"|0x[a-f0-9]+), ([A-Z\_\|]+)(|, \d+)$',
1141 False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001142 def handle_openat(self, args, _result):
1143 if 'O_DIRECTORY' in args[2]:
1144 return
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001145 if args[0] == 'AT_FDCWD':
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001146 # TODO(maruel): Resolve file handle.
1147 if not args[1].startswith('0x'):
1148 t = (
1149 Results.File.READ if 'O_RDONLY' in args[2]
maruel@chromium.org538141b2013-06-03 20:57:17 +00001150 else Results.File.WRITE)
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001151 self._handle_file(args[1][1:-1], t)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001152 else:
1153 # TODO(maruel): Implement relative open if necessary instead of the
1154 # AT_FDCWD flag, let's hope not since this means tracking all active
1155 # directory handles.
maruel@chromium.org538141b2013-06-03 20:57:17 +00001156 raise NotImplementedError('Relative open via openat not implemented.')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001157
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001158 @parse_args(
1159 r'^(\".+?\"|0x[a-f0-9]+), (?:\".+?\"(?:\.\.\.)?|0x[a-f0-9]+), '
1160 '\d+$',
1161 False)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001162 def handle_readlink(self, args, _result):
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001163 # TODO(maruel): Resolve file handle.
1164 if not args[0].startswith('0x'):
1165 self._handle_file(args[0][1:-1], Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001166
1167 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1168 def handle_rename(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001169 self._handle_file(args[0], Results.File.READ)
1170 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001171
1172 def handle_rmdir(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001173 # We track content, not directories.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001174 pass
1175
1176 def handle_setxattr(self, _args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001177 # TODO(maruel): self._handle_file(args[0], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001178 pass
1179
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001180 @parse_args(r'^(\".+?\"|0x[a-f0-9]+), \{.+?\}$', True)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001181 def handle_stat(self, args, _result):
Marc-Antoine Ruelaa4fff52014-10-24 09:10:13 -04001182 # TODO(maruel): Resolve file handle.
1183 if not args[0].startswith('0x'):
1184 self._handle_file(args[0][1:-1], Results.File.TOUCHED)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001185
maruel@chromium.org538141b2013-06-03 20:57:17 +00001186 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1187 def handle_symlink(self, args, _result):
1188 self._handle_file(args[0], Results.File.TOUCHED)
1189 self._handle_file(args[1], Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001190
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001191 @parse_args(r'^\"(.+?)\", \d+', True)
1192 def handle_truncate(self, args, _result):
maruel@chromium.org538141b2013-06-03 20:57:17 +00001193 self._handle_file(args[0], Results.File.WRITE)
csharp@chromium.orgfa672922012-11-02 17:43:31 +00001194
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001195 def handle_unlink(self, _args, _result):
1196 # In theory, the file had to be created anyway.
1197 pass
1198
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001199 def handle_unlinkat(self, _args, _result):
1200 # In theory, the file had to be created anyway.
1201 pass
1202
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001203 def handle_statfs(self, _args, _result):
1204 pass
1205
csharp@chromium.org11ae0672012-10-30 14:53:17 +00001206 def handle_utimensat(self, _args, _result):
1207 pass
1208
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001209 def handle_vfork(self, _args, result):
1210 self._handling_forking('vfork', result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001211
1212 @staticmethod
1213 def _handle_unknown(function, args, result):
1214 raise TracingFailure(
1215 'Unexpected/unimplemented trace %s(%s)= %s' %
1216 (function, args, result),
1217 None, None, None)
1218
maruel@chromium.org588e09d2013-05-21 14:38:57 +00001219 def _handling_forking(self, name, result):
1220 """Transfers cwd."""
1221 if result.startswith(('?', '-1')):
1222 # The call failed.
1223 return
1224 # Update the other process right away.
1225 childpid = int(result)
1226 child = self._root().get_or_set_proc(childpid)
1227 if child.parentid is not None or childpid in self.children:
1228 raise TracingFailure(
1229 'Found internal inconsitency in process lifetime detection '
1230 'during a %s() call' % name,
1231 None, None, None)
1232
1233 # Copy the cwd object.
1234 child.initial_cwd = self.get_cwd()
1235 child.parentid = self.pid
1236 # It is necessary because the logs are processed out of order.
1237 self.children.append(child)
1238
maruel@chromium.org538141b2013-06-03 20:57:17 +00001239 def _handle_file(self, filepath, mode):
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001240 filepath = self._mangle(filepath)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001241 self.add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001242
maruel@chromium.orgbea76be2013-06-03 20:59:25 +00001243 def _mangle(self, filepath):
1244 """Decodes a filepath found in the log and convert it to a late-bound
1245 path if necessary.
1246
1247 |filepath| is an strace 'encoded' string and the returned value is
1248 either an unicode string if the path was absolute or a late bound path
1249 otherwise.
1250 """
1251 filepath = Strace.load_filename(filepath)
1252 if os.path.isabs(filepath):
1253 return filepath
1254 else:
1255 if isinstance(self.get_cwd(), unicode):
1256 return os.path.normpath(os.path.join(self.get_cwd(), filepath))
1257 return self.RelativePath(self.get_cwd(), filepath)
1258
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001259 def __init__(self, blacklist, root_pid, initial_cwd):
1260 """|root_pid| may be None when the root process is not known.
1261
1262 In that case, a search is done after reading all the logs to figure out
1263 the root process.
1264 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001265 super(Strace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001266 assert_is_renderable(initial_cwd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001267 self.root_pid = root_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001268 self.initial_cwd = initial_cwd
1269
1270 def render(self):
1271 """Returns the string value of the initial cwd of the root process.
1272
1273 Used by RelativePath.
1274 """
1275 return self.initial_cwd
1276
1277 def on_line(self, pid, line):
1278 """Transfers control into the Process.on_line() function."""
1279 self.get_or_set_proc(pid).on_line(line.strip())
1280
1281 def to_results(self):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001282 """If necessary, finds back the root process and verify consistency."""
1283 if not self.root_pid:
1284 # The non-sudo case. The traced process was started by strace itself,
1285 # so the pid of the traced process is not known.
1286 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1287 if len(root) == 1:
1288 self.root_process = root[0]
1289 # Save it for later.
1290 self.root_pid = self.root_process.pid
1291 else:
1292 # The sudo case. The traced process was started manually so its pid is
1293 # known.
1294 self.root_process = self._process_lookup.get(self.root_pid)
1295 if not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001296 raise TracingFailure(
1297 'Found internal inconsitency in process lifetime detection '
1298 'while finding the root process',
1299 None,
1300 None,
1301 None,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001302 self.root_pid,
1303 sorted(self._process_lookup))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001304 process = self.root_process.to_results_process()
1305 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1306 raise TracingFailure(
1307 'Found internal inconsitency in process lifetime detection '
1308 'while looking for len(tree) == len(list)',
1309 None,
1310 None,
1311 None,
1312 sorted(self._process_lookup),
1313 sorted(p.pid for p in process.all))
1314 return Results(process)
1315
1316 def get_or_set_proc(self, pid):
1317 """Returns the Context.Process instance for this pid or creates a new one.
1318 """
1319 if not pid or not isinstance(pid, int):
1320 raise TracingFailure(
1321 'Unpexpected value for pid: %r' % pid,
1322 pid,
1323 None,
1324 None,
1325 pid)
1326 if pid not in self._process_lookup:
1327 self._process_lookup[pid] = self.Process(self, pid)
1328 return self._process_lookup[pid]
1329
1330 @classmethod
1331 def traces(cls):
1332 """Returns the list of all handled traces to pass this as an argument to
1333 strace.
1334 """
1335 prefix = 'handle_'
1336 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1337
1338 class Tracer(ApiBase.Tracer):
1339 MAX_LEN = 256
1340
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001341 def __init__(self, logname, use_sudo):
1342 super(Strace.Tracer, self).__init__(logname)
1343 self.use_sudo = use_sudo
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001344 if use_sudo:
1345 # TODO(maruel): Use the jump script systematically to make it easy to
1346 # figure out the root process, so RelativePath is not necessary anymore.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001347 self._child_script = create_exec_thunk()
1348 self._scripts_to_cleanup.append(self._child_script)
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001349
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001350 def trace(self, cmd, cwd, tracename, output):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001351 """Runs strace on an executable.
1352
1353 When use_sudo=True, it is a 3-phases process: start the thunk, start
1354 sudo strace with the pid of the thunk and then have the thunk os.execve()
1355 the process to trace.
1356 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001357 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1358 assert os.path.isabs(cmd[0]), cmd[0]
1359 assert os.path.isabs(cwd), cwd
1360 assert os.path.normpath(cwd) == cwd, cwd
1361 with self._lock:
1362 if not self._initialized:
1363 raise TracingFailure(
1364 'Called Tracer.trace() on an unitialized object',
1365 None, None, None, tracename)
1366 assert tracename not in (i['trace'] for i in self._traces)
1367 stdout = stderr = None
1368 if output:
1369 stdout = subprocess.PIPE
1370 stderr = subprocess.STDOUT
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001371
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001372 # Ensure all file related APIs are hooked.
1373 traces = ','.join(Strace.Context.traces() + ['file'])
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001374 flags = [
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001375 # Each child process has its own trace file. It is necessary because
1376 # strace may generate corrupted log file if multiple processes are
1377 # heavily doing syscalls simultaneously.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001378 '-ff',
maruel@chromium.orga5cfdc32013-05-28 14:46:15 +00001379 # Reduce whitespace usage.
1380 '-a1',
1381 # hex encode non-ascii strings.
1382 # TODO(maruel): '-x',
1383 # TODO(maruel): '-ttt',
1384 # Signals are unnecessary noise here. Note the parser can cope with them
1385 # but reduce the unnecessary output.
1386 '-esignal=none',
1387 # Print as much data as wanted.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001388 '-s', '%d' % self.MAX_LEN,
1389 '-e', 'trace=%s' % traces,
1390 '-o', self._logname + '.' + tracename,
1391 ]
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001392
1393 if self.use_sudo:
1394 pipe_r, pipe_w = os.pipe()
1395 # Start the child process paused.
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00001396 target_cmd = [sys.executable, self._child_script, str(pipe_r)] + cmd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001397 logging.debug(' '.join(target_cmd))
1398 child_proc = subprocess.Popen(
1399 target_cmd,
1400 stdin=subprocess.PIPE,
1401 stdout=stdout,
1402 stderr=stderr,
1403 cwd=cwd)
1404
1405 # TODO(maruel): both processes must use the same UID for it to work
1406 # without sudo. Look why -p is failing at the moment without sudo.
1407 trace_cmd = [
1408 'sudo',
1409 'strace',
1410 '-p', str(child_proc.pid),
1411 ] + flags
1412 logging.debug(' '.join(trace_cmd))
1413 strace_proc = subprocess.Popen(
1414 trace_cmd,
1415 cwd=cwd,
1416 stdin=subprocess.PIPE,
1417 stdout=subprocess.PIPE,
1418 stderr=subprocess.PIPE)
1419
1420 line = strace_proc.stderr.readline()
1421 if not re.match(r'^Process \d+ attached \- interrupt to quit$', line):
1422 # TODO(maruel): Raise an exception.
1423 assert False, line
1424
1425 # Now fire the child process.
1426 os.write(pipe_w, 'x')
1427
1428 out = child_proc.communicate()[0]
1429 strace_out = strace_proc.communicate()[0]
1430
1431 # TODO(maruel): if strace_proc.returncode: Add an exception.
1432 saved_out = strace_out if strace_proc.returncode else out
1433 root_pid = child_proc.pid
1434 else:
1435 # Non-sudo case.
1436 trace_cmd = [
1437 'strace',
1438 ] + flags + cmd
1439 logging.debug(' '.join(trace_cmd))
1440 child_proc = subprocess.Popen(
1441 trace_cmd,
1442 cwd=cwd,
1443 stdin=subprocess.PIPE,
1444 stdout=stdout,
1445 stderr=stderr)
1446 out = child_proc.communicate()[0]
1447 # TODO(maruel): Walk the logs and figure out the root process would
1448 # simplify parsing the logs a *lot*.
1449 saved_out = out
1450 # The trace reader will have to figure out.
1451 root_pid = None
1452
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001453 with self._lock:
1454 assert tracename not in (i['trace'] for i in self._traces)
1455 self._traces.append(
1456 {
1457 'cmd': cmd,
1458 'cwd': cwd,
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001459 'output': saved_out,
1460 'pid': root_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001461 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001462 })
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001463 return child_proc.returncode, out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001464
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00001465 def __init__(self, use_sudo=None):
1466 super(Strace, self).__init__()
1467 self.use_sudo = use_sudo
1468
1469 def get_tracer(self, logname):
1470 return self.Tracer(logname, self.use_sudo)
1471
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001472 @staticmethod
1473 def clean_trace(logname):
maruel12e30012015-10-09 11:55:35 -07001474 if fs.isfile(logname):
1475 fs.remove(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001476 # Also delete any pid specific file from previous traces.
1477 for i in glob.iglob(logname + '.*'):
1478 if i.rsplit('.', 1)[1].isdigit():
maruel12e30012015-10-09 11:55:35 -07001479 fs.remove(i)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001480
1481 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001482 def parse_log(cls, logname, blacklist, trace_name):
1483 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00001484 assert os.path.isabs(logname)
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05001485 data = tools.read_json(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001486 out = []
1487 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00001488 if trace_name and item['trace'] != trace_name:
1489 continue
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001490 result = {
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001491 'output': item['output'],
maruel@chromium.org964ddde2012-10-29 18:13:46 +00001492 'trace': item['trace'],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001493 }
1494 try:
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001495 context = cls.Context(blacklist, item['pid'], item['cwd'])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001496 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001497 logging.debug('Reading %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001498 pid = pidfile.rsplit('.', 1)[1]
1499 if pid.isdigit():
1500 pid = int(pid)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001501 found_line = False
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001502 for line in open(pidfile, 'rb'):
1503 context.on_line(pid, line)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001504 found_line = True
1505 if not found_line:
1506 # Ensures that a completely empty trace still creates the
1507 # corresponding Process instance by logging a dummy line.
1508 context.on_line(pid, '')
1509 else:
1510 logging.warning('Found unexpected file %s', pidfile)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001511 result['results'] = context.to_results()
maruel@chromium.org424f5692012-10-15 15:42:38 +00001512 except TracingFailure:
1513 result['exception'] = sys.exc_info()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001514 out.append(result)
1515 return out
1516
1517
1518class Dtrace(ApiBase):
1519 """Uses DTrace framework through dtrace. Requires root access.
1520
1521 Implies Mac OSX.
1522
1523 dtruss can't be used because it has compatibility issues with python.
1524
1525 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1526 get the absolute path of the 'cwd' dtrace variable from the probe.
1527
1528 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1529 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1530 manually.
1531
1532 errno is not printed in the log since this implementation currently only cares
1533 about files that were successfully opened.
1534 """
1535 class Context(ApiBase.Context):
1536 # Format: index pid function(args)
1537 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1538
1539 # Arguments parsing.
1540 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1541 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1542 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1543 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1544 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1545 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1546
maruel@chromium.orgac361162013-06-04 15:54:06 +00001547 # O_DIRECTORY is not defined on Windows and dtrace doesn't exist on Windows.
1548 O_DIRECTORY = os.O_DIRECTORY if hasattr(os, 'O_DIRECTORY') else None
maruel@chromium.org538141b2013-06-03 20:57:17 +00001549 O_RDWR = os.O_RDWR
1550 O_WRONLY = os.O_WRONLY
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001551
1552 class Process(ApiBase.Context.Process):
1553 def __init__(self, *args):
1554 super(Dtrace.Context.Process, self).__init__(*args)
1555 self.cwd = self.initial_cwd
1556
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001557 def __init__(self, blacklist, thunk_pid, initial_cwd):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001558 logging.info(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001559 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001560 super(Dtrace.Context, self).__init__(blacklist)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001561 assert isinstance(initial_cwd, unicode), initial_cwd
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001562 # Process ID of the temporary script created by create_subprocess_thunk().
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001563 self._thunk_pid = thunk_pid
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001564 self._initial_cwd = initial_cwd
1565 self._line_number = 0
1566
1567 def on_line(self, line):
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001568 assert isinstance(line, unicode), line
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001569 self._line_number += 1
1570 match = self.RE_HEADER.match(line)
1571 if not match:
1572 raise TracingFailure(
1573 'Found malformed line: %s' % line,
1574 None,
1575 self._line_number,
1576 line)
1577 fn = getattr(
1578 self,
1579 'handle_%s' % match.group(2).replace('-', '_'),
1580 self._handle_ignored)
1581 # It is guaranteed to succeed because of the regexp. Or at least I thought
1582 # it would.
1583 pid = int(match.group(1))
1584 try:
1585 return fn(pid, match.group(3))
1586 except TracingFailure, e:
1587 # Hack in the values since the handler could be a static function.
1588 e.pid = pid
1589 e.line = line
1590 e.line_number = self._line_number
1591 # Re-raise the modified exception.
1592 raise
1593 except (KeyError, NotImplementedError, ValueError), e:
1594 raise TracingFailure(
1595 'Trace generated a %s exception: %s' % (
1596 e.__class__.__name__, str(e)),
1597 pid,
1598 self._line_number,
1599 line,
1600 e)
1601
1602 def to_results(self):
1603 process = self.root_process.to_results_process()
1604 # Internal concistency check.
1605 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1606 raise TracingFailure(
1607 'Found internal inconsitency in process lifetime detection '
1608 'while looking for len(tree) == len(list)',
1609 None,
1610 None,
1611 None,
1612 sorted(self._process_lookup),
1613 sorted(p.pid for p in process.all))
1614 return Results(process)
1615
1616 def handle_dtrace_BEGIN(self, _pid, args):
1617 if not self.RE_DTRACE_BEGIN.match(args):
1618 raise TracingFailure(
1619 'Found internal inconsitency in dtrace_BEGIN log line',
1620 None, None, None)
1621
1622 def handle_proc_start(self, pid, args):
1623 """Transfers cwd.
1624
1625 The dtrace script already takes care of only tracing the processes that
1626 are child of the traced processes so there is no need to verify the
1627 process hierarchy.
1628 """
1629 if pid in self._process_lookup:
1630 raise TracingFailure(
1631 'Found internal inconsitency in proc_start: %d started two times' %
1632 pid,
1633 None, None, None)
1634 match = self.RE_PROC_START.match(args)
1635 if not match:
1636 raise TracingFailure(
1637 'Failed to parse arguments: %s' % args,
1638 None, None, None)
1639 ppid = int(match.group(1))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001640 if ppid == self._thunk_pid and not self.root_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001641 proc = self.root_process = self.Process(
1642 self.blacklist, pid, self._initial_cwd)
1643 elif ppid in self._process_lookup:
1644 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1645 self._process_lookup[ppid].children.append(proc)
1646 else:
1647 # Another process tree, ignore.
1648 return
1649 self._process_lookup[pid] = proc
1650 logging.debug(
1651 'New child: %s -> %d cwd:%s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00001652 (ppid, pid, render(proc.initial_cwd)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001653
1654 def handle_proc_exit(self, pid, _args):
1655 """Removes cwd."""
1656 if pid in self._process_lookup:
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00001657 # self._thunk_pid is not traced itself and other traces run neither.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001658 self._process_lookup[pid].cwd = None
1659
1660 def handle_execve(self, pid, args):
1661 """Sets the process' executable.
1662
1663 TODO(maruel): Read command line arguments. See
1664 https://discussions.apple.com/thread/1980539 for an example.
1665 https://gist.github.com/1242279
1666
1667 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1668 :)
1669 """
1670 if not pid in self._process_lookup:
1671 # Another process tree, ignore.
1672 return
1673 match = self.RE_EXECVE.match(args)
1674 if not match:
1675 raise TracingFailure(
1676 'Failed to parse arguments: %r' % args,
1677 None, None, None)
1678 proc = self._process_lookup[pid]
1679 proc.executable = match.group(1)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001680 self._handle_file(pid, proc.executable, Results.File.READ)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001681 proc.command = self.process_escaped_arguments(match.group(3))
1682 if int(match.group(2)) != len(proc.command):
1683 raise TracingFailure(
1684 'Failed to parse execve() arguments: %s' % args,
1685 None, None, None)
1686
1687 def handle_chdir(self, pid, args):
1688 """Updates cwd."""
1689 if pid not in self._process_lookup:
1690 # Another process tree, ignore.
1691 return
1692 cwd = self.RE_CHDIR.match(args).group(1)
1693 if not cwd.startswith('/'):
1694 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1695 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1696 else:
1697 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1698 cwd2 = cwd
1699 self._process_lookup[pid].cwd = cwd2
1700
1701 def handle_open_nocancel(self, pid, args):
1702 """Redirects to handle_open()."""
1703 return self.handle_open(pid, args)
1704
1705 def handle_open(self, pid, args):
1706 if pid not in self._process_lookup:
1707 # Another process tree, ignore.
1708 return
1709 match = self.RE_OPEN.match(args)
1710 if not match:
1711 raise TracingFailure(
1712 'Failed to parse arguments: %s' % args,
1713 None, None, None)
1714 flag = int(match.group(2), 16)
1715 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1716 # Ignore directories.
1717 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001718 self._handle_file(
1719 pid,
1720 match.group(1),
1721 Results.File.READ if not ((self.O_RDWR | self.O_WRONLY) & flag)
1722 else Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001723
1724 def handle_rename(self, pid, args):
1725 if pid not in self._process_lookup:
1726 # Another process tree, ignore.
1727 return
1728 match = self.RE_RENAME.match(args)
1729 if not match:
1730 raise TracingFailure(
1731 'Failed to parse arguments: %s' % args,
1732 None, None, None)
maruel@chromium.org538141b2013-06-03 20:57:17 +00001733 self._handle_file(pid, match.group(1), Results.File.READ)
1734 self._handle_file(pid, match.group(2), Results.File.WRITE)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001735
maruel@chromium.org538141b2013-06-03 20:57:17 +00001736 def _handle_file(self, pid, filepath, mode):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001737 if not filepath.startswith('/'):
1738 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1739 # We can get '..' in the path.
1740 filepath = os.path.normpath(filepath)
1741 # Sadly, still need to filter out directories here;
1742 # saw open_nocancel(".", 0, 0) = 0 lines.
maruel12e30012015-10-09 11:55:35 -07001743 if fs.isdir(filepath):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001744 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00001745 self._process_lookup[pid].add_file(filepath, mode)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001746
1747 def handle_ftruncate(self, pid, args):
1748 """Just used as a signal to kill dtrace, ignoring."""
1749 pass
1750
1751 @staticmethod
1752 def _handle_ignored(pid, args):
1753 """Is called for all the event traces that are not handled."""
1754 raise NotImplementedError('Please implement me')
1755
1756 @staticmethod
1757 def process_escaped_arguments(text):
1758 """Extracts escaped arguments on a string and return the arguments as a
1759 list.
1760
1761 Implemented as an automaton.
1762
1763 Example:
1764 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1765 function will return ['python2.7', '-c', 'print("hi")]
1766 """
1767 if not text.endswith('\\0'):
1768 raise ValueError('String is not null terminated: %r' % text, text)
1769 text = text[:-2]
1770
1771 def unescape(x):
1772 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1773 out = []
1774 escaped = False
1775 for i in x:
1776 if i == '\\' and not escaped:
1777 escaped = True
1778 continue
1779 escaped = False
1780 out.append(i)
1781 return ''.join(out)
1782
1783 return [unescape(i) for i in text.split('\\001')]
1784
1785 class Tracer(ApiBase.Tracer):
1786 # pylint: disable=C0301
1787 #
1788 # To understand the following code, you'll want to take a look at:
1789 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html
1790 # https://wikis.oracle.com/display/DTrace/Variables
1791 # http://docs.oracle.com/cd/E19205-01/820-4221/
1792 #
1793 # 0. Dump all the valid probes into a text file. It is important, you
1794 # want to redirect into a file and you don't want to constantly 'sudo'.
1795 # $ sudo dtrace -l > probes.txt
1796 #
1797 # 1. Count the number of probes:
1798 # $ wc -l probes.txt
1799 # 81823 # On OSX 10.7, including 1 header line.
1800 #
1801 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1802 # likes and skipping the header with NR>1:
1803 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1804 # dtrace
1805 # fbt
1806 # io
1807 # ip
1808 # lockstat
1809 # mach_trap
1810 # proc
1811 # profile
1812 # sched
1813 # syscall
1814 # tcp
1815 # vminfo
1816 #
1817 # 3. List of valid probes:
1818 # $ grep syscall probes.txt | less
1819 # or use dtrace directly:
1820 # $ sudo dtrace -l -P syscall | less
1821 #
1822 # trackedpid is an associative array where its value can be 0, 1 or 2.
1823 # 0 is for untracked processes and is the default value for items not
1824 # in the associative array.
1825 # 1 is for tracked processes.
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00001826 # 2 is for the script created by create_subprocess_thunk() only. It is not
1827 # tracked itself but all its decendants are.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00001828 #
1829 # The script will kill itself only once waiting_to_die == 1 and
1830 # current_processes == 0, so that both getlogin() was called and that
1831 # all traced processes exited.
1832 #
1833 # TODO(maruel): Use cacheable predicates. See
1834 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1835 D_CODE = """
1836 dtrace:::BEGIN {
1837 waiting_to_die = 0;
1838 current_processes = 0;
1839 logindex = 0;
1840 printf("%d %d %s_%s(\\"%s\\")\\n",
1841 logindex, PID, probeprov, probename, SCRIPT);
1842 logindex++;
1843 }
1844
1845 proc:::start /trackedpid[ppid]/ {
1846 trackedpid[pid] = 1;
1847 current_processes += 1;
1848 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1849 logindex, pid, probeprov, probename,
1850 ppid,
1851 execname,
1852 current_processes);
1853 logindex++;
1854 }
1855 /* Should use SCRIPT but there is no access to this variable at that
1856 * point. */
1857 proc:::start /ppid == PID && execname == "Python"/ {
1858 trackedpid[pid] = 2;
1859 current_processes += 1;
1860 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1861 logindex, pid, probeprov, probename,
1862 ppid,
1863 execname,
1864 current_processes);
1865 logindex++;
1866 }
1867 proc:::exit /trackedpid[pid] &&
1868 current_processes == 1 &&
1869 waiting_to_die == 1/ {
1870 trackedpid[pid] = 0;
1871 current_processes -= 1;
1872 printf("%d %d %s_%s(%d)\\n",
1873 logindex, pid, probeprov, probename,
1874 current_processes);
1875 logindex++;
1876 exit(0);
1877 }
1878 proc:::exit /trackedpid[pid]/ {
1879 trackedpid[pid] = 0;
1880 current_processes -= 1;
1881 printf("%d %d %s_%s(%d)\\n",
1882 logindex, pid, probeprov, probename,
1883 current_processes);
1884 logindex++;
1885 }
1886
1887 /* Use an arcane function to detect when we need to die */
1888 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
1889 waiting_to_die = 1;
1890 printf("%d %d %s()\\n", logindex, pid, probefunc);
1891 logindex++;
1892 }
1893 syscall::ftruncate:entry /
1894 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
1895 exit(0);
1896 }
1897
1898 syscall::open*:entry /trackedpid[pid] == 1/ {
1899 self->open_arg0 = arg0;
1900 self->open_arg1 = arg1;
1901 self->open_arg2 = arg2;
1902 }
1903 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
1904 this->open_arg0 = copyinstr(self->open_arg0);
1905 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
1906 logindex, pid, probefunc,
1907 this->open_arg0,
1908 self->open_arg1,
1909 self->open_arg2);
1910 logindex++;
1911 this->open_arg0 = 0;
1912 }
1913 syscall::open*:return /trackedpid[pid] == 1/ {
1914 self->open_arg0 = 0;
1915 self->open_arg1 = 0;
1916 self->open_arg2 = 0;
1917 }
1918
1919 syscall::rename:entry /trackedpid[pid] == 1/ {
1920 self->rename_arg0 = arg0;
1921 self->rename_arg1 = arg1;
1922 }
1923 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
1924 this->rename_arg0 = copyinstr(self->rename_arg0);
1925 this->rename_arg1 = copyinstr(self->rename_arg1);
1926 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
1927 logindex, pid, probefunc,
1928 this->rename_arg0,
1929 this->rename_arg1);
1930 logindex++;
1931 this->rename_arg0 = 0;
1932 this->rename_arg1 = 0;
1933 }
1934 syscall::rename:return /trackedpid[pid] == 1/ {
1935 self->rename_arg0 = 0;
1936 self->rename_arg1 = 0;
1937 }
1938
1939 /* Track chdir, it's painful because it is only receiving relative path.
1940 */
1941 syscall::chdir:entry /trackedpid[pid] == 1/ {
1942 self->chdir_arg0 = arg0;
1943 }
1944 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
1945 this->chdir_arg0 = copyinstr(self->chdir_arg0);
1946 printf("%d %d %s(\\"%s\\")\\n",
1947 logindex, pid, probefunc,
1948 this->chdir_arg0);
1949 logindex++;
1950 this->chdir_arg0 = 0;
1951 }
1952 syscall::chdir:return /trackedpid[pid] == 1/ {
1953 self->chdir_arg0 = 0;
1954 }
1955 """
1956
1957 # execve-specific code, tends to throw a lot of exceptions.
1958 D_CODE_EXECVE = """
1959 /* Finally what we care about! */
1960 syscall::exec*:entry /trackedpid[pid]/ {
1961 self->exec_arg0 = copyinstr(arg0);
1962 /* Incrementally probe for a NULL in the argv parameter of execve() to
1963 * figure out argc. */
1964 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
1965 * found. */
1966 self->exec_argc = 0;
1967 /* Probe for argc==1 */
1968 this->exec_argv = (user_addr_t*)copyin(
1969 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1970 self->exec_argc = this->exec_argv[self->exec_argc] ?
1971 (self->exec_argc + 1) : self->exec_argc;
1972
1973 /* Probe for argc==2 */
1974 this->exec_argv = (user_addr_t*)copyin(
1975 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1976 self->exec_argc = this->exec_argv[self->exec_argc] ?
1977 (self->exec_argc + 1) : self->exec_argc;
1978
1979 /* Probe for argc==3 */
1980 this->exec_argv = (user_addr_t*)copyin(
1981 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1982 self->exec_argc = this->exec_argv[self->exec_argc] ?
1983 (self->exec_argc + 1) : self->exec_argc;
1984
1985 /* Probe for argc==4 */
1986 this->exec_argv = (user_addr_t*)copyin(
1987 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1988 self->exec_argc = this->exec_argv[self->exec_argc] ?
1989 (self->exec_argc + 1) : self->exec_argc;
1990
1991 /* Copy the inputs strings since there is no guarantee they'll be
1992 * present after the call completed. */
1993 self->exec_argv0 = (self->exec_argc > 0) ?
1994 copyinstr(this->exec_argv[0]) : "";
1995 self->exec_argv1 = (self->exec_argc > 1) ?
1996 copyinstr(this->exec_argv[1]) : "";
1997 self->exec_argv2 = (self->exec_argc > 2) ?
1998 copyinstr(this->exec_argv[2]) : "";
1999 self->exec_argv3 = (self->exec_argc > 3) ?
2000 copyinstr(this->exec_argv[3]) : "";
2001 this->exec_argv = 0;
2002 }
2003 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2004 /* We need to join strings here, as using multiple printf() would
2005 * cause tearing when multiple threads/processes are traced.
2006 * Since it is impossible to escape a string and join it to another one,
2007 * like sprintf("%s%S", previous, more), use hackery.
2008 * Each of the elements are split with a \\1. \\0 cannot be used because
2009 * it is simply ignored. This will conflict with any program putting a
2010 * \\1 in their execve() string but this should be "rare enough" */
2011 this->args = "";
2012 /* Process exec_argv[0] */
2013 this->args = strjoin(
2014 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2015
2016 /* Process exec_argv[1] */
2017 this->args = strjoin(
2018 this->args, (self->exec_argc > 1) ? "\\1" : "");
2019 this->args = strjoin(
2020 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2021
2022 /* Process exec_argv[2] */
2023 this->args = strjoin(
2024 this->args, (self->exec_argc > 2) ? "\\1" : "");
2025 this->args = strjoin(
2026 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2027
2028 /* Process exec_argv[3] */
2029 this->args = strjoin(
2030 this->args, (self->exec_argc > 3) ? "\\1" : "");
2031 this->args = strjoin(
2032 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2033
2034 /* Prints self->exec_argc to permits verifying the internal
2035 * consistency since this code is quite fishy. */
2036 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2037 logindex, pid, probefunc,
2038 self->exec_arg0,
2039 self->exec_argc,
2040 this->args);
2041 logindex++;
2042 this->args = 0;
2043 }
2044 syscall::exec*:return /trackedpid[pid]/ {
2045 self->exec_arg0 = 0;
2046 self->exec_argc = 0;
2047 self->exec_argv0 = 0;
2048 self->exec_argv1 = 0;
2049 self->exec_argv2 = 0;
2050 self->exec_argv3 = 0;
2051 }
2052 """
2053
2054 # Code currently not used.
2055 D_EXTRANEOUS = """
2056 /* This is a good learning experience, since it traces a lot of things
2057 * related to the process and child processes.
2058 * Warning: it generates a gigantic log. For example, tracing
2059 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2060 * several minutes to execute.
2061 */
2062 /*
2063 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2064 printf("%d %d %s_%s() = %d\\n",
2065 logindex, pid, probeprov, probefunc, errno);
2066 logindex++;
2067 }
2068 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2069 printf("%d %d %s_%s() = %d\\n",
2070 logindex, pid, probeprov, probefunc, errno);
2071 logindex++;
2072 }
2073 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2074 printf("%d %d %s_%s() = %d\\n",
2075 logindex, pid, probeprov, probefunc, errno);
2076 logindex++;
2077 }
2078 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2079 printf("%d %d %s_%s() = %d\\n",
2080 logindex, pid, probeprov, probefunc, errno);
2081 logindex++;
2082 }
2083 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2084 printf("%d %d %s_%s() = %d\\n",
2085 logindex, pid, probeprov, probefunc, errno);
2086 logindex++;
2087 }
2088 */
2089 /* TODO(maruel): *stat* functions and friends
2090 syscall::access:return,
2091 syscall::chdir:return,
2092 syscall::chflags:return,
2093 syscall::chown:return,
2094 syscall::chroot:return,
2095 syscall::getattrlist:return,
2096 syscall::getxattr:return,
2097 syscall::lchown:return,
2098 syscall::lstat64:return,
2099 syscall::lstat:return,
2100 syscall::mkdir:return,
2101 syscall::pathconf:return,
2102 syscall::readlink:return,
2103 syscall::removexattr:return,
2104 syscall::setxattr:return,
2105 syscall::stat64:return,
2106 syscall::stat:return,
2107 syscall::truncate:return,
2108 syscall::unlink:return,
2109 syscall::utimes:return,
2110 */
2111 """
2112
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002113 def __init__(self, logname, use_sudo):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002114 """Starts the log collection with dtrace.
2115
2116 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2117 this needs to wait for dtrace to be "warmed up".
2118 """
2119 super(Dtrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002120 # This script is used as a signal to figure out the root process.
2121 self._signal_script = create_subprocess_thunk()
2122 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002123 # This unique dummy temp file is used to signal the dtrace script that it
2124 # should stop as soon as all the child processes are done. A bit hackish
2125 # but works fine enough.
2126 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
Marc-Antoine Ruel3c979cb2015-03-11 13:43:28 -04002127 prefix=u'trace_signal_file')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002128
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002129 dtrace_path = '/usr/sbin/dtrace'
maruel12e30012015-10-09 11:55:35 -07002130 if not fs.isfile(dtrace_path):
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002131 dtrace_path = 'dtrace'
maruel12e30012015-10-09 11:55:35 -07002132 elif use_sudo is None and (fs.stat(dtrace_path).st_mode & stat.S_ISUID):
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002133 # No need to sudo. For those following at home, don't do that.
2134 use_sudo = False
2135
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002136 # Note: do not use the -p flag. It's useless if the initial process quits
2137 # too fast, resulting in missing traces from the grand-children. The D
2138 # code manages the dtrace lifetime itself.
2139 trace_cmd = [
maruel@chromium.org9b2112a2013-04-05 18:01:02 +00002140 dtrace_path,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002141 # Use a larger buffer if getting 'out of scratch space' errors.
2142 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2143 '-b', '10m',
2144 '-x', 'dynvarsize=10m',
2145 #'-x', 'dtrace_global_maxsize=1m',
2146 '-x', 'evaltime=exec',
2147 '-o', '/dev/stderr',
2148 '-q',
2149 '-n', self._get_dtrace_code(),
2150 ]
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002151 if use_sudo is not False:
2152 trace_cmd.insert(0, 'sudo')
2153
maruel12e30012015-10-09 11:55:35 -07002154 with fs.open(self._logname + '.log', 'wb') as logfile:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002155 self._dtrace = subprocess.Popen(
2156 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2157 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2158
2159 # Reads until one line is printed, which signifies dtrace is up and ready.
maruel12e30012015-10-09 11:55:35 -07002160 with fs.open(self._logname + '.log', 'rb') as logfile:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002161 while 'dtrace_BEGIN' not in logfile.readline():
2162 if self._dtrace.poll() is not None:
2163 # Do a busy wait. :/
2164 break
2165 logging.debug('dtrace started')
2166
2167 def _get_dtrace_code(self):
2168 """Setups the D code to implement child process tracking.
2169
2170 Injects the cookie in the script so it knows when to stop.
2171
2172 The script will detect any instance of the script created with
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002173 create_subprocess_thunk() and will start tracing it.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002174 """
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002175 out = (
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002176 'inline int PID = %d;\n'
2177 'inline string SCRIPT = "%s";\n'
2178 'inline int FILE_ID = %d;\n'
2179 '\n'
2180 '%s') % (
2181 os.getpid(),
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002182 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002183 self._dummy_file_id,
maruel@chromium.orgbe75f282012-11-02 19:34:37 +00002184 self.D_CODE)
2185 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2186 # Do not enable by default since it tends to spew dtrace: error lines
2187 # because the execve() parameters are not in valid memory at the time of
2188 # logging.
2189 # TODO(maruel): Find a way to make this reliable since it's useful but
2190 # only works in limited/trivial uses cases for now.
2191 out += self.D_CODE_EXECVE
2192 return out
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002193
2194 def trace(self, cmd, cwd, tracename, output):
2195 """Runs dtrace on an executable.
2196
2197 This dtruss is broken when it starts the process itself or when tracing
2198 child processes, this code starts a wrapper process
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002199 generated with create_subprocess_thunk() which starts the executable to
2200 trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002201 """
2202 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2203 assert os.path.isabs(cmd[0]), cmd[0]
2204 assert os.path.isabs(cwd), cwd
2205 assert os.path.normpath(cwd) == cwd, cwd
2206 with self._lock:
2207 if not self._initialized:
2208 raise TracingFailure(
2209 'Called Tracer.trace() on an unitialized object',
2210 None, None, None, tracename)
2211 assert tracename not in (i['trace'] for i in self._traces)
2212
2213 # Starts the script wrapper to start the child process. This signals the
2214 # dtrace script that this process is to be traced.
2215 stdout = stderr = None
2216 if output:
2217 stdout = subprocess.PIPE
2218 stderr = subprocess.STDOUT
2219 child_cmd = [
2220 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002221 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002222 tracename,
2223 ]
2224 # Call a dummy function so that dtrace knows I'm about to launch a process
2225 # that needs to be traced.
2226 # Yummy.
2227 child = subprocess.Popen(
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00002228 child_cmd + tools.fix_python_path(cmd),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002229 stdin=subprocess.PIPE,
2230 stdout=stdout,
2231 stderr=stderr,
2232 cwd=cwd)
2233 logging.debug('Started child pid: %d' % child.pid)
2234
2235 out = child.communicate()[0]
2236 # This doesn't mean tracing is done, one of the grand-child process may
2237 # still be alive. It will be tracked with the dtrace script.
2238
2239 with self._lock:
2240 assert tracename not in (i['trace'] for i in self._traces)
2241 self._traces.append(
2242 {
2243 'cmd': cmd,
2244 'cwd': cwd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002245 'pid': child.pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002246 'output': out,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002247 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002248 })
2249 return child.returncode, out
2250
2251 def close(self, timeout=None):
2252 """Terminates dtrace."""
2253 logging.debug('close(%s)' % timeout)
2254 try:
2255 try:
2256 super(Dtrace.Tracer, self).close(timeout)
2257 # Signal dtrace that it should stop now.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002258 # ftruncate doesn't exist on Windows.
2259 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002260 if timeout:
2261 start = time.time()
2262 # Use polling. :/
2263 while (self._dtrace.poll() is None and
2264 (time.time() - start) < timeout):
2265 time.sleep(0.1)
2266 self._dtrace.kill()
2267 self._dtrace.wait()
2268 finally:
2269 # Make sure to kill it in any case.
2270 if self._dtrace.poll() is None:
2271 try:
2272 self._dtrace.kill()
2273 self._dtrace.wait()
2274 except OSError:
2275 pass
2276
2277 if self._dtrace.returncode != 0:
2278 # Warn about any dtrace failure but basically ignore it.
2279 print 'dtrace failure: %s' % self._dtrace.returncode
2280 finally:
2281 os.close(self._dummy_file_id)
maruel12e30012015-10-09 11:55:35 -07002282 fs.remove(self._dummy_file_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002283
2284 def post_process_log(self):
2285 """Sorts the log back in order when each call occured.
2286
2287 dtrace doesn't save the buffer in strict order since it keeps one buffer
2288 per CPU.
2289 """
2290 super(Dtrace.Tracer, self).post_process_log()
2291 logname = self._logname + '.log'
maruel12e30012015-10-09 11:55:35 -07002292 with fs.open(logname, 'rb') as logfile:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002293 lines = [l for l in logfile if l.strip()]
2294 errors = [l for l in lines if l.startswith('dtrace:')]
2295 if errors:
2296 raise TracingFailure(
2297 'Found errors in the trace: %s' % '\n'.join(errors),
2298 None, None, None, logname)
2299 try:
2300 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2301 except ValueError:
2302 raise TracingFailure(
2303 'Found errors in the trace: %s' % '\n'.join(
2304 l for l in lines if l.split(' ', 1)[0].isdigit()),
2305 None, None, None, logname)
maruel12e30012015-10-09 11:55:35 -07002306 with fs.open(logname, 'wb') as logfile:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002307 logfile.write(''.join(lines))
2308
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00002309 def __init__(self, use_sudo=None):
2310 super(Dtrace, self).__init__()
2311 self.use_sudo = use_sudo
2312
2313 def get_tracer(self, logname):
2314 return self.Tracer(logname, self.use_sudo)
2315
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002316 @staticmethod
2317 def clean_trace(logname):
2318 for ext in ('', '.log'):
maruel12e30012015-10-09 11:55:35 -07002319 if fs.isfile(logname + ext):
2320 fs.remove(logname + ext)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002321
2322 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002323 def parse_log(cls, logname, blacklist, trace_name):
2324 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00002325 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002326
2327 def blacklist_more(filepath):
2328 # All the HFS metadata is in the form /.vol/...
2329 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2330
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05002331 data = tools.read_json(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002332 out = []
2333 for item in data['traces']:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00002334 if trace_name and item['trace'] != trace_name:
2335 continue
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002336 result = {
2337 'output': item['output'],
2338 'trace': item['trace'],
2339 }
2340 try:
2341 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002342 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2343 # be valid UTF-8 and we control the log output.
2344 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002345 context.on_line(line)
2346 result['results'] = context.to_results()
2347 except TracingFailure:
2348 result['exception'] = sys.exc_info()
2349 out.append(result)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002350 return out
2351
2352
2353class LogmanTrace(ApiBase):
2354 """Uses the native Windows ETW based tracing functionality to trace a child
2355 process.
2356
2357 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2358 the Windows Kernel doesn't have a concept of 'current working directory' at
2359 all. A Win32 process has a map of current directories, one per drive letter
2360 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2361 opened relative to another file_object or as an absolute path. All the current
2362 working directory logic is done in user mode.
2363 """
2364 class Context(ApiBase.Context):
2365 """Processes a ETW log line and keeps the list of existent and non
2366 existent files accessed.
2367
2368 Ignores directories.
2369 """
2370 # These indexes are for the stripped version in json.
2371 EVENT_NAME = 0
2372 TYPE = 1
2373 PID = 2
2374 TID = 3
2375 PROCESSOR_ID = 4
2376 TIMESTAMP = 5
2377 USER_DATA = 6
2378
2379 class Process(ApiBase.Context.Process):
2380 def __init__(self, *args):
2381 super(LogmanTrace.Context.Process, self).__init__(*args)
2382 # Handle file objects that succeeded.
2383 self.file_objects = {}
2384
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002385 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2386 logging.info(
2387 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2388 thunk_cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002389 super(LogmanTrace.Context, self).__init__(blacklist)
maruel@chromium.org561d4b22013-09-26 21:08:08 +00002390 self._drive_map = file_path.DosDriveMap()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002391 # Threads mapping to the corresponding process id.
2392 self._threads_active = {}
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002393 # Process ID of the tracer, e.g. the temporary script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002394 # create_subprocess_thunk(). This is tricky because the process id may
2395 # have been reused.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002396 self._thunk_pid = thunk_pid
2397 self._thunk_cmd = thunk_cmd
2398 self._trace_name = trace_name
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002399 self._line_number = 0
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002400 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002401
2402 def on_line(self, line):
2403 """Processes a json Event line."""
2404 self._line_number += 1
2405 try:
2406 # By Opcode
2407 handler = getattr(
2408 self,
2409 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2410 None)
2411 if not handler:
2412 raise TracingFailure(
2413 'Unexpected event %s_%s' % (
2414 line[self.EVENT_NAME], line[self.TYPE]),
2415 None, None, None)
2416 handler(line)
2417 except TracingFailure, e:
2418 # Hack in the values since the handler could be a static function.
2419 e.pid = line[self.PID]
2420 e.line = line
2421 e.line_number = self._line_number
2422 # Re-raise the modified exception.
2423 raise
2424 except (KeyError, NotImplementedError, ValueError), e:
2425 raise TracingFailure(
2426 'Trace generated a %s exception: %s' % (
2427 e.__class__.__name__, str(e)),
2428 line[self.PID],
2429 self._line_number,
2430 line,
2431 e)
2432
2433 def to_results(self):
2434 if not self.root_process:
2435 raise TracingFailure(
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002436 'Failed to detect the initial process %d' % self._thunk_pid,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002437 None, None, None)
2438 process = self.root_process.to_results_process()
2439 return Results(process)
2440
2441 def _thread_to_process(self, tid):
2442 """Finds the process from the thread id."""
2443 tid = int(tid, 16)
2444 pid = self._threads_active.get(tid)
2445 if not pid or not self._process_lookup.get(pid):
2446 return
2447 return self._process_lookup[pid]
2448
2449 @classmethod
2450 def handle_EventTrace_Header(cls, line):
2451 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2452 BUFFER_SIZE = cls.USER_DATA
2453 #VERSION = cls.USER_DATA + 1
2454 #PROVIDER_VERSION = cls.USER_DATA + 2
2455 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2456 #END_TIME = cls.USER_DATA + 4
2457 #TIMER_RESOLUTION = cls.USER_DATA + 5
2458 #MAX_FILE_SIZE = cls.USER_DATA + 6
2459 #LOG_FILE_MODE = cls.USER_DATA + 7
2460 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2461 #START_BUFFERS = cls.USER_DATA + 9
2462 #POINTER_SIZE = cls.USER_DATA + 10
2463 EVENTS_LOST = cls.USER_DATA + 11
2464 #CPU_SPEED = cls.USER_DATA + 12
2465 #LOGGER_NAME = cls.USER_DATA + 13
2466 #LOG_FILE_NAME = cls.USER_DATA + 14
2467 #BOOT_TIME = cls.USER_DATA + 15
2468 #PERF_FREQ = cls.USER_DATA + 16
2469 #START_TIME = cls.USER_DATA + 17
2470 #RESERVED_FLAGS = cls.USER_DATA + 18
2471 #BUFFERS_LOST = cls.USER_DATA + 19
2472 #SESSION_NAME_STRING = cls.USER_DATA + 20
2473 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2474 if line[EVENTS_LOST] != '0':
2475 raise TracingFailure(
2476 ( '%s events were lost during trace, please increase the buffer '
2477 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2478 None, None, None)
2479
2480 def handle_FileIo_Cleanup(self, line):
2481 """General wisdom: if a file is closed, it's because it was opened.
2482
2483 Note that FileIo_Close is not used since if a file was opened properly but
2484 not closed before the process exits, only Cleanup will be logged.
2485 """
2486 #IRP = self.USER_DATA
2487 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2488 FILE_OBJECT = self.USER_DATA + 2
2489 #FILE_KEY = self.USER_DATA + 3
2490 proc = self._thread_to_process(line[TTID])
2491 if not proc:
2492 # Not a process we care about.
2493 return
2494 file_object = line[FILE_OBJECT]
2495 if file_object in proc.file_objects:
maruel@chromium.org538141b2013-06-03 20:57:17 +00002496 filepath, access_type = proc.file_objects.pop(file_object)
2497 proc.add_file(filepath, access_type)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002498
2499 def handle_FileIo_Create(self, line):
2500 """Handles a file open.
2501
2502 All FileIo events are described at
2503 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2504 for some value of 'description'.
2505
2506 " (..) process and thread id values of the IO events (..) are not valid "
2507 http://msdn.microsoft.com/magazine/ee358703.aspx
2508
2509 The FileIo.Create event doesn't return if the CreateFile() call
2510 succeeded, so keep track of the file_object and check that it is
2511 eventually closed with FileIo_Cleanup.
2512 """
2513 #IRP = self.USER_DATA
2514 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2515 FILE_OBJECT = self.USER_DATA + 2
2516 #CREATE_OPTIONS = self.USER_DATA + 3
2517 #FILE_ATTRIBUTES = self.USER_DATA + 4
maruel@chromium.org538141b2013-06-03 20:57:17 +00002518 #SHARE_ACCESS = self.USER_DATA + 5
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002519 OPEN_PATH = self.USER_DATA + 6
2520
2521 proc = self._thread_to_process(line[TTID])
2522 if not proc:
2523 # Not a process we care about.
2524 return
2525
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002526 raw_path = line[OPEN_PATH]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002527 # Ignore directories and bare drive right away.
2528 if raw_path.endswith(os.path.sep):
2529 return
2530 filepath = self._drive_map.to_win32(raw_path)
2531 # Ignore bare drive right away. Some may still fall through with format
2532 # like '\\?\X:'
2533 if len(filepath) == 2:
2534 return
2535 file_object = line[FILE_OBJECT]
maruel12e30012015-10-09 11:55:35 -07002536 if fs.isdir(filepath):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002537 # There is no O_DIRECTORY equivalent on Windows. The closed is
2538 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2539 # simply discard directories are they are found.
2540 return
maruel@chromium.org538141b2013-06-03 20:57:17 +00002541 # Override any stale file object.
2542 # TODO(maruel): Figure out a way to detect if the file was opened for
2543 # reading or writting. Sadly CREATE_OPTIONS doesn't seem to be of any help
2544 # here. For now mark as None to make it clear we have no idea what it is
2545 # about.
2546 proc.file_objects[file_object] = (filepath, None)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002547
2548 def handle_FileIo_Rename(self, line):
2549 # TODO(maruel): Handle?
2550 pass
2551
2552 def handle_Process_End(self, line):
2553 pid = line[self.PID]
2554 if self._process_lookup.get(pid):
2555 logging.info('Terminated: %d' % pid)
2556 self._process_lookup[pid] = None
2557 else:
2558 logging.debug('Terminated: %d' % pid)
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002559 if self._thunk_process and self._thunk_process.pid == pid:
2560 self._thunk_process = None
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002561
2562 def handle_Process_Start(self, line):
2563 """Handles a new child process started by PID."""
2564 #UNIQUE_PROCESS_KEY = self.USER_DATA
2565 PROCESS_ID = self.USER_DATA + 1
2566 #PARENT_PID = self.USER_DATA + 2
2567 #SESSION_ID = self.USER_DATA + 3
2568 #EXIT_STATUS = self.USER_DATA + 4
2569 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2570 #USER_SID = self.USER_DATA + 6
2571 IMAGE_FILE_NAME = self.USER_DATA + 7
2572 COMMAND_LINE = self.USER_DATA + 8
2573
2574 ppid = line[self.PID]
2575 pid = int(line[PROCESS_ID], 16)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002576 command_line = CommandLineToArgvW(line[COMMAND_LINE])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002577 logging.debug(
2578 'New process %d->%d (%s) %s' %
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002579 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002580
2581 if pid == self._thunk_pid:
2582 # Need to ignore processes we don't know about because the log is
2583 # system-wide. self._thunk_pid shall start only one process.
2584 # This is tricky though because Windows *loves* to reuse process id and
2585 # it happens often that the process ID of the thunk script created by
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002586 # create_subprocess_thunk() is reused. So just detecting the pid here is
2587 # not sufficient, we must confirm the command line.
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002588 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2589 logging.info(
2590 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2591 pid, self._trace_name, command_line, self._thunk_cmd)
2592 return
2593
2594 # TODO(maruel): The check is quite weak. Add the thunk path.
2595 if self._thunk_process:
2596 raise TracingFailure(
2597 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2598 'already set') % (self._thunk_pid, self._thunk_process.pid),
2599 None, None, None)
2600 proc = self.Process(self.blacklist, pid, None)
2601 self._thunk_process = proc
2602 return
2603 elif ppid == self._thunk_pid and self._thunk_process:
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002604 proc = self.Process(self.blacklist, pid, None)
2605 self.root_process = proc
2606 ppid = None
2607 elif self._process_lookup.get(ppid):
2608 proc = self.Process(self.blacklist, pid, None)
2609 self._process_lookup[ppid].children.append(proc)
2610 else:
2611 # Ignore
2612 return
2613 self._process_lookup[pid] = proc
2614
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002615 proc.command = command_line
2616 proc.executable = line[IMAGE_FILE_NAME]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002617 # proc.command[0] may be the absolute path of 'executable' but it may be
2618 # anything else too. If it happens that command[0] ends with executable,
2619 # use it, otherwise defaults to the base name.
2620 cmd0 = proc.command[0].lower()
2621 if not cmd0.endswith('.exe'):
2622 # TODO(maruel): That's not strictly true either.
2623 cmd0 += '.exe'
maruel12e30012015-10-09 11:55:35 -07002624 if cmd0.endswith(proc.executable) and fs.isfile(cmd0):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002625 # Fix the path.
2626 cmd0 = cmd0.replace('/', os.path.sep)
2627 cmd0 = os.path.normpath(cmd0)
maruel@chromium.org561d4b22013-09-26 21:08:08 +00002628 proc.executable = file_path.get_native_path_case(cmd0)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002629 logging.info(
2630 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2631
2632 def handle_Thread_End(self, line):
2633 """Has the same parameters as Thread_Start."""
2634 tid = int(line[self.TID], 16)
2635 self._threads_active.pop(tid, None)
2636
2637 def handle_Thread_Start(self, line):
2638 """Handles a new thread created.
2639
2640 Do not use self.PID here since a process' initial thread is created by
2641 the parent process.
2642 """
2643 PROCESS_ID = self.USER_DATA
2644 TTHREAD_ID = self.USER_DATA + 1
2645 #STACK_BASE = self.USER_DATA + 2
2646 #STACK_LIMIT = self.USER_DATA + 3
2647 #USER_STACK_BASE = self.USER_DATA + 4
2648 #USER_STACK_LIMIT = self.USER_DATA + 5
2649 #AFFINITY = self.USER_DATA + 6
2650 #WIN32_START_ADDR = self.USER_DATA + 7
2651 #TEB_BASE = self.USER_DATA + 8
2652 #SUB_PROCESS_TAG = self.USER_DATA + 9
2653 #BASE_PRIORITY = self.USER_DATA + 10
2654 #PAGE_PRIORITY = self.USER_DATA + 11
2655 #IO_PRIORITY = self.USER_DATA + 12
2656 #THREAD_FLAGS = self.USER_DATA + 13
2657 # Do not use self.PID here since a process' initial thread is created by
2658 # the parent process.
2659 pid = int(line[PROCESS_ID], 16)
2660 tid = int(line[TTHREAD_ID], 16)
2661 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2662 self._threads_active[tid] = pid
2663
2664 @classmethod
2665 def supported_events(cls):
2666 """Returns all the procesed events."""
2667 out = []
2668 for member in dir(cls):
2669 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2670 if match:
2671 out.append(match.groups())
2672 return out
2673
2674 class Tracer(ApiBase.Tracer):
2675 # The basic headers.
2676 EXPECTED_HEADER = [
2677 u'Event Name',
2678 u'Type',
2679 u'Event ID',
2680 u'Version',
2681 u'Channel',
2682 u'Level', # 5
2683 u'Opcode',
2684 u'Task',
2685 u'Keyword',
2686 u'PID',
2687 u'TID', # 10
2688 u'Processor Number',
2689 u'Instance ID',
2690 u'Parent Instance ID',
2691 u'Activity ID',
2692 u'Related Activity ID', # 15
2693 u'Clock-Time',
2694 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2695 u'User(ms)', # pretty much useless.
2696 u'User Data', # Extra arguments that are event-specific.
2697 ]
2698 # Only the useful headers common to all entries are listed there. Any column
2699 # at 19 or higher is dependent on the specific event.
2700 EVENT_NAME = 0
2701 TYPE = 1
2702 PID = 9
2703 TID = 10
2704 PROCESSOR_ID = 11
2705 TIMESTAMP = 16
2706 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2707 USER_DATA = 19
2708
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002709 class CsvReader(object):
2710 """CSV reader that reads files generated by tracerpt.exe.
2711
2712 csv.reader() fails to read them properly, it mangles file names quoted
2713 with "" with a comma in it.
2714 """
2715 # 0. Had a ',' or one of the following ' ' after a comma, next should
2716 # be ' ', '"' or string or ',' for an empty field.
2717 ( HAD_DELIMITER,
2718 # 1. Processing an unquoted field up to ','.
2719 IN_STR,
2720 # 2. Processing a new field starting with '"'.
2721 STARTING_STR_QUOTED,
2722 # 3. Second quote in a row at the start of a field. It could be either
2723 # '""foo""' or '""'. Who the hell thought it was a great idea to use
2724 # the same character for delimiting and escaping?
2725 STARTING_SECOND_QUOTE,
2726 # 4. A quote inside a quoted string where the previous character was
2727 # not a quote, so the string is not empty. Can be either: end of a
2728 # quoted string (a delimiter) or a quote escape. The next char must be
2729 # either '"' or ','.
2730 HAD_QUOTE_IN_QUOTED,
2731 # 5. Second quote inside a quoted string.
2732 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
2733 # 6. Processing a field that started with '"'.
2734 IN_STR_QUOTED) = range(7)
2735
2736 def __init__(self, f):
2737 self.f = f
2738
2739 def __iter__(self):
2740 return self
2741
2742 def next(self):
2743 """Splits the line in fields."""
2744 line = self.f.readline()
2745 if not line:
2746 raise StopIteration()
2747 line = line.strip()
2748 fields = []
2749 state = self.HAD_DELIMITER
2750 for i, c in enumerate(line):
2751 if state == self.HAD_DELIMITER:
2752 if c == ',':
2753 # Empty field.
2754 fields.append('')
2755 elif c == ' ':
2756 # Ignore initial whitespaces
2757 pass
2758 elif c == '"':
2759 state = self.STARTING_STR_QUOTED
2760 fields.append('')
2761 else:
2762 # Start of a new field.
2763 state = self.IN_STR
2764 fields.append(c)
2765
2766 elif state == self.IN_STR:
2767 # Do not accept quote inside unquoted field.
2768 assert c != '"', (i, c, line, fields)
2769 if c == ',':
2770 fields[-1] = fields[-1].strip()
2771 state = self.HAD_DELIMITER
2772 else:
2773 fields[-1] = fields[-1] + c
2774
2775 elif state == self.STARTING_STR_QUOTED:
2776 if c == '"':
2777 # Do not store the character yet.
2778 state = self.STARTING_SECOND_QUOTE
2779 else:
2780 state = self.IN_STR_QUOTED
2781 fields[-1] = fields[-1] + c
2782
2783 elif state == self.STARTING_SECOND_QUOTE:
2784 if c == ',':
2785 # It was an empty field. '""' == ''.
2786 state = self.HAD_DELIMITER
2787 else:
2788 fields[-1] = fields[-1] + '"' + c
2789 state = self.IN_STR_QUOTED
2790
2791 elif state == self.HAD_QUOTE_IN_QUOTED:
2792 if c == ',':
2793 # End of the string.
2794 state = self.HAD_DELIMITER
2795 elif c == '"':
2796 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
2797 else:
2798 # The previous double-quote was just an unescaped quote.
2799 fields[-1] = fields[-1] + '"' + c
2800 state = self.IN_STR_QUOTED
2801
2802 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2803 if c == ',':
2804 # End of the string.
2805 state = self.HAD_DELIMITER
2806 fields[-1] = fields[-1] + '"'
2807 else:
maruel@chromium.org261d1352013-02-25 19:23:20 +00002808 # That's just how the logger rolls. Revert back to appending the
2809 # char and "guess" it was a quote in a double-quoted string.
2810 state = self.IN_STR_QUOTED
2811 fields[-1] = fields[-1] + '"' + c
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002812
2813 elif state == self.IN_STR_QUOTED:
2814 if c == '"':
2815 # Could be a delimiter or an escape.
2816 state = self.HAD_QUOTE_IN_QUOTED
2817 else:
2818 fields[-1] = fields[-1] + c
2819
2820 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2821 fields[-1] = fields[-1] + '"'
2822 else:
2823 assert state in (
2824 # Terminated with a normal field.
2825 self.IN_STR,
2826 # Terminated with an empty field.
2827 self.STARTING_SECOND_QUOTE,
2828 # Terminated with a normal quoted field.
2829 self.HAD_QUOTE_IN_QUOTED), (
2830 line, state, fields)
2831 return fields
2832
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002833 def __init__(self, logname):
2834 """Starts the log collection.
2835
2836 Requires administrative access. logman.exe is synchronous so no need for a
2837 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2838 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2839
2840 One can get the list of potentially interesting providers with:
2841 "logman query providers | findstr /i file"
2842 """
2843 super(LogmanTrace.Tracer, self).__init__(logname)
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002844 self._signal_script = create_subprocess_thunk()
2845 self._scripts_to_cleanup.append(self._signal_script)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002846 cmd_start = [
2847 'logman.exe',
2848 'start',
2849 'NT Kernel Logger',
2850 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2851 # splitio,fileiocompletion,syscall,file,cswitch,img
2852 '(process,fileio,thread)',
2853 '-o', self._logname + '.etl',
2854 '-ets', # Send directly to kernel
2855 # Values extracted out of thin air.
2856 # Event Trace Session buffer size in kb.
2857 '-bs', '10240',
2858 # Number of Event Trace Session buffers.
2859 '-nb', '16', '256',
2860 ]
2861 logging.debug('Running: %s' % cmd_start)
2862 try:
2863 subprocess.check_call(
2864 cmd_start,
2865 stdin=subprocess.PIPE,
2866 stdout=subprocess.PIPE,
2867 stderr=subprocess.STDOUT)
2868 except subprocess.CalledProcessError, e:
2869 if e.returncode == -2147024891:
2870 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2871 elif e.returncode == -2144337737:
2872 print >> sys.stderr, (
2873 'A kernel trace was already running, stop it and try again')
2874 raise
2875
2876 def trace(self, cmd, cwd, tracename, output):
2877 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2878 assert os.path.isabs(cmd[0]), cmd[0]
2879 assert os.path.isabs(cwd), cwd
2880 assert os.path.normpath(cwd) == cwd, cwd
2881 with self._lock:
2882 if not self._initialized:
2883 raise TracingFailure(
2884 'Called Tracer.trace() on an unitialized object',
2885 None, None, None, tracename)
2886 assert tracename not in (i['trace'] for i in self._traces)
2887
2888 # Use "logman -?" for help.
2889
2890 stdout = stderr = None
2891 if output:
2892 stdout = subprocess.PIPE
2893 stderr = subprocess.STDOUT
2894
2895 # Run the child process.
2896 logging.debug('Running: %s' % cmd)
maruel@chromium.org2909b7c2013-05-29 18:52:59 +00002897 # Use the temporary script generated with create_subprocess_thunk() so we
2898 # have a clear pid owner. Since trace_inputs.py can be used as a library
2899 # and could trace multiple processes simultaneously, it makes it more
2900 # complex if the executable to be traced is executed directly here. It
2901 # also solves issues related to logman.exe that needs to be executed to
2902 # control the kernel trace.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002903 child_cmd = [
2904 sys.executable,
maruel@chromium.org4c9b4fa2013-05-29 19:22:14 +00002905 self._signal_script,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002906 tracename,
2907 ]
2908 child = subprocess.Popen(
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00002909 child_cmd + tools.fix_python_path(cmd),
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002910 cwd=cwd,
2911 stdin=subprocess.PIPE,
2912 stdout=stdout,
2913 stderr=stderr)
2914 logging.debug('Started child pid: %d' % child.pid)
2915 out = child.communicate()[0]
2916 # This doesn't mean all the grand-children are done. Sadly, we don't have
2917 # a good way to determine that.
2918
2919 with self._lock:
2920 assert tracename not in (i['trace'] for i in self._traces)
2921 self._traces.append({
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +00002922 'cmd': cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002923 'cwd': cwd,
maruel@chromium.org964ddde2012-10-29 18:13:46 +00002924 'output': out,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002925 'pid': child.pid,
maruel@chromium.org19e9fcd2012-10-29 18:24:02 +00002926 # Used to figure out the real process when process ids are reused.
2927 'thunk_cmd': child_cmd,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002928 'trace': tracename,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002929 })
2930
2931 return child.returncode, out
2932
2933 def close(self, _timeout=None):
2934 """Stops the kernel log collection and converts the traces to text
2935 representation.
2936 """
2937 with self._lock:
maruel@chromium.org808e3f22013-05-16 16:42:54 +00002938 try:
2939 super(LogmanTrace.Tracer, self).close()
2940 finally:
2941 cmd_stop = [
2942 'logman.exe',
2943 'stop',
2944 'NT Kernel Logger',
2945 '-ets', # Sends the command directly to the kernel.
2946 ]
2947 logging.debug('Running: %s' % cmd_stop)
2948 subprocess.check_call(
2949 cmd_stop,
2950 stdin=subprocess.PIPE,
2951 stdout=subprocess.PIPE,
2952 stderr=subprocess.STDOUT)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002953
2954 def post_process_log(self):
2955 """Converts the .etl file into .csv then into .json."""
2956 super(LogmanTrace.Tracer, self).post_process_log()
2957 logformat = 'csv'
2958 self._convert_log(logformat)
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002959 self._trim_log(logformat)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002960
maruel@chromium.org808e3f22013-05-16 16:42:54 +00002961 def _gen_logdata(self):
2962 return {
2963 'format': 'csv',
2964 'traces': self._traces,
2965 }
2966
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002967 def _trim_log(self, logformat):
2968 """Reduces the amount of data in original log by generating a 'reduced'
2969 log.
2970 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002971 if logformat == 'csv_utf16':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002972 file_handle = codecs.open(
2973 self._logname + '.' + logformat, 'r', encoding='utf-16')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002974
2975 elif logformat == 'csv':
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002976 assert sys.getfilesystemencoding() == 'mbcs'
2977 file_handle = codecs.open(
2978 self._logname + '.' + logformat, 'r',
2979 encoding=get_current_encoding())
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002980
2981 supported_events = LogmanTrace.Context.supported_events()
2982
2983 def trim(generator):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00002984 """Loads items from the generator and returns the interesting data.
2985
2986 It filters out any uninteresting line and reduce the amount of data in
2987 the trace.
2988 """
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002989 for index, line in enumerate(generator):
2990 if not index:
2991 if line != self.EXPECTED_HEADER:
2992 raise TracingFailure(
maruel@chromium.org306e0e72012-11-02 18:22:03 +00002993 'Found malformed header: %s' % line,
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00002994 None, None, None)
2995 continue
2996 # As you can see, the CSV is full of useful non-redundant information:
2997 if (line[2] != '0' or # Event ID
2998 line[3] not in ('2', '3') or # Version
2999 line[4] != '0' or # Channel
3000 line[5] != '0' or # Level
3001 line[7] != '0' or # Task
3002 line[8] != '0x0000000000000000' or # Keyword
3003 line[12] != '' or # Instance ID
3004 line[13] != '' or # Parent Instance ID
3005 line[14] != self.NULL_GUID or # Activity ID
3006 line[15] != ''): # Related Activity ID
3007 raise TracingFailure(
3008 'Found unexpected values in line: %s' % ' '.join(line),
3009 None, None, None)
3010
3011 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3012 continue
3013
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003014 yield [
3015 line[self.EVENT_NAME],
3016 line[self.TYPE],
3017 line[self.PID],
3018 line[self.TID],
3019 line[self.PROCESSOR_ID],
3020 line[self.TIMESTAMP],
3021 ] + line[self.USER_DATA:]
3022
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003023 # must not convert the trim() call into a list, since it will use too much
3024 # memory for large trace. use a csv file as a workaround since the json
3025 # parser requires a complete in-memory file.
maruel12e30012015-10-09 11:55:35 -07003026 path = os.path.join(
3027 unicode(os.getcwd()), u'%s.preprocessed' % self._logname)
3028 with fs.open(path, 'wb') as f:
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003029 # $ and * can't be used in file name on windows, reducing the likelihood
3030 # of having to escape a string.
3031 out = csv.writer(
3032 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003033 for line in trim(self.CsvReader(file_handle)):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003034 out.writerow([s.encode('utf-8') for s in line])
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003035
3036 def _convert_log(self, logformat):
3037 """Converts the ETL trace to text representation.
3038
3039 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3040 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3041
3042 Arguments:
3043 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3044
3045 Use "tracerpt -?" for help.
3046 """
3047 LOCALE_INVARIANT = 0x7F
3048 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3049 cmd_convert = [
3050 'tracerpt.exe',
3051 '-l', self._logname + '.etl',
3052 '-o', self._logname + '.' + logformat,
3053 '-gmt', # Use UTC
3054 '-y', # No prompt
3055 # Use -of XML to get the header of each items after column 19, e.g. all
3056 # the actual headers of 'User Data'.
3057 ]
3058
3059 if logformat == 'csv':
3060 # tracerpt localizes the 'Type' column, for major brainfuck
3061 # entertainment. I can't imagine any sane reason to do that.
3062 cmd_convert.extend(['-of', 'CSV'])
3063 elif logformat == 'csv_utf16':
3064 # This causes it to use UTF-16, which doubles the log size but ensures
3065 # the log is readable for non-ASCII characters.
3066 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3067 elif logformat == 'xml':
3068 cmd_convert.extend(['-of', 'XML'])
3069 else:
3070 raise ValueError('Unexpected log format \'%s\'' % logformat)
3071 logging.debug('Running: %s' % cmd_convert)
3072 # This can takes tens of minutes for large logs.
3073 # Redirects all output to stderr.
3074 subprocess.check_call(
3075 cmd_convert,
3076 stdin=subprocess.PIPE,
3077 stdout=sys.stderr,
3078 stderr=sys.stderr)
3079
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003080 def __init__(self, use_sudo=False): # pylint: disable=W0613
3081 super(LogmanTrace, self).__init__()
3082 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3083
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003084 @staticmethod
3085 def clean_trace(logname):
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003086 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
maruel12e30012015-10-09 11:55:35 -07003087 if fs.isfile(logname + ext):
3088 fs.remove(logname + ext)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003089
3090 @classmethod
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003091 def parse_log(cls, logname, blacklist, trace_name):
3092 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
maruel@chromium.org8b056ba2012-10-16 14:04:49 +00003093 assert os.path.isabs(logname)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003094
3095 def blacklist_more(filepath):
3096 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3097 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3098
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003099 # Create a list of (Context, result_dict) tuples. This is necessary because
3100 # the csv file may be larger than the amount of available memory.
3101 contexes = [
3102 (
3103 cls.Context(
3104 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3105 {
3106 'output': item['output'],
3107 'trace': item['trace'],
3108 },
3109 )
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05003110 for item in tools.read_json(logname)['traces']
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003111 if not trace_name or item['trace'] == trace_name
3112 ]
3113
3114 # The log may be too large to fit in memory and it is not efficient to read
3115 # it multiple times, so multiplex the contexes instead, which is slightly
3116 # more awkward.
maruel12e30012015-10-09 11:55:35 -07003117 path = os.path.join(unicode(os.getcwd()), u'%s.preprocessed' % logname)
3118 with fs.open(path, 'rb') as f:
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003119 lines = csv.reader(
3120 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3121 for encoded in lines:
3122 line = [s.decode('utf-8') for s in encoded]
3123 # Convert the PID in-place from hex.
3124 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3125 for context in contexes:
3126 if 'exception' in context[1]:
3127 continue
3128 try:
3129 context[0].on_line(line)
3130 except TracingFailure:
3131 context[1]['exception'] = sys.exc_info()
3132
3133 for context in contexes:
3134 if 'exception' in context[1]:
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003135 continue
maruel@chromium.orgeaedeb42012-10-31 14:38:22 +00003136 context[1]['results'] = context[0].to_results()
3137
3138 return [context[1] for context in contexes]
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003139
3140
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003141def get_api(**kwargs):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003142 """Returns the correct implementation for the current OS."""
3143 if sys.platform == 'cygwin':
3144 raise NotImplementedError(
3145 'Not implemented for cygwin, start the script from Win32 python')
3146 flavors = {
3147 'win32': LogmanTrace,
3148 'darwin': Dtrace,
3149 'sunos5': Dtrace,
3150 'freebsd7': Dtrace,
3151 'freebsd8': Dtrace,
3152 }
3153 # Defaults to strace.
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003154 return flavors.get(sys.platform, Strace)(**kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003155
3156
3157def extract_directories(root_dir, files, blacklist):
3158 """Detects if all the files in a directory are in |files| and if so, replace
3159 the individual files by a Results.Directory instance.
3160
3161 Takes a list of Results.File instances and returns a shorter list of
3162 Results.File and Results.Directory instances.
3163
3164 Arguments:
3165 - root_dir: Optional base directory that shouldn't be search further.
3166 - files: list of Results.File instances.
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003167 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'.
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003168 """
3169 logging.info(
3170 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3171 assert not (root_dir or '').endswith(os.path.sep), root_dir
maruel@chromium.org306e0e72012-11-02 18:22:03 +00003172 # It is important for root_dir to not be a symlinked path, make sure to call
3173 # os.path.realpath() as needed.
3174 assert not root_dir or (
maruel12e30012015-10-09 11:55:35 -07003175 fs.realpath(file_path.get_native_path_case(root_dir)) == root_dir)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003176 assert not any(isinstance(f, Results.Directory) for f in files)
3177 # Remove non existent files.
3178 files = [f for f in files if f.existent]
3179 if not files:
3180 return files
3181 # All files must share the same root, which can be None.
3182 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3183
3184 # Creates a {directory: {filename: File}} mapping, up to root.
3185 buckets = {}
3186 if root_dir:
3187 buckets[root_dir] = {}
3188 for fileobj in files:
3189 path = fileobj.full_path
3190 directory = os.path.dirname(path)
3191 assert directory
3192 # Do not use os.path.basename() so trailing os.path.sep is kept.
3193 basename = path[len(directory)+1:]
3194 files_in_directory = buckets.setdefault(directory, {})
3195 files_in_directory[basename] = fileobj
3196 # Add all the directories recursively up to root.
3197 while True:
3198 old_d = directory
3199 directory = os.path.dirname(directory)
3200 if directory + os.path.sep == root_dir or directory == old_d:
3201 break
3202 buckets.setdefault(directory, {})
3203
3204 root_prefix = len(root_dir) + 1 if root_dir else 0
3205 for directory in sorted(buckets, reverse=True):
maruel12e30012015-10-09 11:55:35 -07003206 if not fs.isdir(directory):
maruel@chromium.orgb1678e62013-03-19 15:55:17 +00003207 logging.debug(
3208 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3209 continue
maruel12e30012015-10-09 11:55:35 -07003210 actual = set(f for f in fs.listdir(directory) if not blacklist(f))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003211 expected = set(buckets[directory])
3212 if not (actual - expected):
3213 parent = os.path.dirname(directory)
3214 buckets[parent][os.path.basename(directory)] = Results.Directory(
3215 root_dir,
3216 directory[root_prefix:],
3217 False,
3218 sum(f.size for f in buckets[directory].itervalues()),
3219 sum(f.nb_files for f in buckets[directory].itervalues()))
3220 # Remove the whole bucket.
3221 del buckets[directory]
3222
3223 # Reverse the mapping with what remains. The original instances are returned,
3224 # so the cached meta data is kept.
3225 files = sum((x.values() for x in buckets.itervalues()), [])
3226 return sorted(files, key=lambda x: x.path)
3227
3228
3229def trace(logfile, cmd, cwd, api, output):
3230 """Traces an executable. Returns (returncode, output) from api.
3231
3232 Arguments:
3233 - logfile: file to write to.
3234 - cmd: command to run.
3235 - cwd: current directory to start the process in.
3236 - api: a tracing api instance.
3237 - output: if True, returns output, otherwise prints it at the console.
3238 """
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003239 cmd = tools.fix_python_path(cmd)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003240 api.clean_trace(logfile)
3241 with api.get_tracer(logfile) as tracer:
3242 return tracer.trace(cmd, cwd, 'default', output)
3243
3244
maruel@chromium.orge5322512013-08-19 20:17:57 +00003245def CMDclean(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003246 """Cleans up traces."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003247 options, args = parser.parse_args(args)
3248 api = get_api()
3249 api.clean_trace(options.log)
3250 return 0
3251
3252
maruel@chromium.orge5322512013-08-19 20:17:57 +00003253def CMDtrace(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003254 """Traces an executable."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003255 parser.allow_interspersed_args = False
3256 parser.add_option(
3257 '-q', '--quiet', action='store_true',
3258 help='Redirects traced executable output to /dev/null')
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003259 parser.add_option(
3260 '-s', '--sudo', action='store_true',
3261 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3262 parser.add_option(
3263 '-n', '--no-sudo', action='store_false',
3264 help='Don\'t use sudo')
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003265 options, args = parser.parse_args(args)
3266
3267 if not args:
3268 parser.error('Please provide a command to run')
3269
Marc-Antoine Ruel5906fcd2013-11-08 21:02:24 -05003270 if not can_trace():
3271 parser.error('Please rerun this program with admin privileges')
3272
maruel12e30012015-10-09 11:55:35 -07003273 if not os.path.isabs(args[0]) and fs.access(args[0], os.X_OK):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003274 args[0] = os.path.abspath(args[0])
3275
maruel@chromium.orgf14633b2013-01-25 15:12:52 +00003276 # options.sudo default value is None, which is to do whatever tracer defaults
3277 # do.
3278 api = get_api(use_sudo=options.sudo)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003279 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3280
3281
maruel@chromium.orge5322512013-08-19 20:17:57 +00003282def CMDread(parser, args):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003283 """Reads the logs and prints the result."""
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003284 parser.add_option(
3285 '-V', '--variable',
3286 nargs=2,
3287 action='append',
3288 dest='variables',
3289 metavar='VAR_NAME directory',
3290 default=[],
3291 help=('Variables to replace relative directories against. Example: '
3292 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3293 'home dir with $HOME') % getpass.getuser())
3294 parser.add_option(
3295 '--root-dir',
3296 help='Root directory to base everything off it. Anything outside of this '
3297 'this directory will not be reported')
3298 parser.add_option(
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003299 '--trace-name',
3300 help='Only reads one of the trace. Defaults to reading all traces')
3301 parser.add_option(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003302 '-j', '--json', action='store_true',
3303 help='Outputs raw result data as json')
3304 parser.add_option(
maruel@chromium.org3683afe2013-07-27 00:09:27 +00003305 '--trace-blacklist', action='append', default=[],
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003306 help='List of regexp to use as blacklist filter')
3307 options, args = parser.parse_args(args)
3308
3309 if options.root_dir:
maruel@chromium.org561d4b22013-09-26 21:08:08 +00003310 options.root_dir = file_path.get_native_path_case(
maruel@chromium.org7dc17b32013-04-02 22:27:05 +00003311 unicode(os.path.abspath(options.root_dir)))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003312
3313 variables = dict(options.variables)
3314 api = get_api()
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05003315 blacklist = tools.gen_blacklist(options.trace_blacklist)
maruel@chromium.orgec74ff82012-10-29 18:14:47 +00003316 data = api.parse_log(options.log, blacklist, options.trace_name)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003317 # Process each trace.
3318 output_as_json = []
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003319 try:
3320 for item in data:
3321 if 'exception' in item:
3322 # Do not abort the other traces.
3323 print >> sys.stderr, (
3324 'Trace %s: Got an exception: %s' % (
3325 item['trace'], item['exception'][1]))
3326 continue
3327 results = item['results']
3328 if options.root_dir:
3329 results = results.strip_root(options.root_dir)
3330
3331 if options.json:
3332 output_as_json.append(results.flatten())
3333 else:
3334 simplified = extract_directories(
3335 options.root_dir, results.files, blacklist)
3336 simplified = [f.replace_variables(variables) for f in simplified]
3337 if len(data) > 1:
3338 print('Trace: %s' % item['trace'])
3339 print('Total: %d' % len(results.files))
3340 print('Non existent: %d' % len(results.non_existent))
3341 for f in results.non_existent:
3342 print(' %s' % f.path)
3343 print(
3344 'Interesting: %d reduced to %d' % (
3345 len(results.existent), len(simplified)))
3346 for f in simplified:
3347 print(' %s' % f.path)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003348
3349 if options.json:
Marc-Antoine Ruelde011802013-11-12 15:19:47 -05003350 tools.write_json(sys.stdout, output_as_json, False)
maruel@chromium.org9e56b592013-05-21 16:56:05 +00003351 except KeyboardInterrupt:
3352 return 1
3353 except IOError as e:
3354 if e.errno == errno.EPIPE:
3355 # Do not print a stack trace when the output is piped to less and the user
3356 # quits before the whole output was written.
3357 return 1
3358 raise
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003359 return 0
3360
3361
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04003362class OptionParserTraceInputs(logging_utils.OptionParserWithLogging):
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003363 """Adds automatic --log handling."""
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003364
3365 # Disable --log-file options since both --log and --log-file options are
3366 # confusing.
3367 # TODO(vadimsh): Rename --log-file or --log to something else.
3368 enable_log_file = False
3369
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003370 def __init__(self, **kwargs):
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04003371 logging_utils.OptionParserWithLogging.__init__(self, **kwargs)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003372 self.add_option(
3373 '-l', '--log', help='Log file to generate or read, required')
3374
3375 def parse_args(self, *args, **kwargs):
3376 """Makes sure the paths make sense.
3377
3378 On Windows, / and \ are often mixed together in a path.
3379 """
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04003380 options, args = logging_utils.OptionParserWithLogging.parse_args(
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003381 self, *args, **kwargs)
3382 if not options.log:
3383 self.error('Must supply a log file with -l')
maruel12e30012015-10-09 11:55:35 -07003384 options.log = unicode(os.path.abspath(options.log))
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003385 return options, args
3386
3387
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003388def main(argv):
maruel@chromium.orge5322512013-08-19 20:17:57 +00003389 dispatcher = subcommand.CommandDispatcher(__name__)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003390 try:
maruel@chromium.org3d671992013-08-20 00:38:27 +00003391 return dispatcher.execute(
3392 OptionParserTraceInputs(version=__version__), argv)
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003393 except TracingFailure, e:
3394 sys.stderr.write('\nError: ')
3395 sys.stderr.write(str(e))
3396 sys.stderr.write('\n')
3397 return 1
3398
3399
3400if __name__ == '__main__':
maruel8e4e40c2016-05-30 06:21:07 -07003401 subprocess42.inhibit_os_error_reporting()
maruel@chromium.orge5322512013-08-19 20:17:57 +00003402 fix_encoding.fix_encoding()
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00003403 tools.disable_buffering()
maruel@chromium.orge5322512013-08-19 20:17:57 +00003404 colorama.init()
maruel@chromium.org8fb47fe2012-10-03 20:13:15 +00003405 sys.exit(main(sys.argv[1:]))