maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # coding=utf-8 |
maruel | ea586f3 | 2016-04-05 11:11:33 -0700 | [diff] [blame] | 3 | # Copyright 2012 The LUCI Authors. All rights reserved. |
maruel | f1f5e2a | 2016-05-25 17:10:39 -0700 | [diff] [blame] | 4 | # Use of this source code is governed under the Apache License, Version 2.0 |
| 5 | # that can be found in the LICENSE file. |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 6 | |
| 7 | """Traces an executable and its child processes and extract the files accessed |
| 8 | by them. |
| 9 | |
| 10 | The implementation uses OS-specific API. The native Kernel logger and the ETL |
| 11 | interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise. |
| 12 | The OS-specific implementation is hidden in an 'API' interface. |
| 13 | |
| 14 | The results are embedded in a Results instance. The tracing is done in two |
| 15 | phases, the first is to do the actual trace and generate an |
| 16 | implementation-specific log file. Then the log file is parsed to extract the |
| 17 | information, including the individual child processes and the files accessed |
| 18 | from the log. |
| 19 | """ |
| 20 | |
| 21 | import codecs |
| 22 | import csv |
maruel@chromium.org | 9e56b59 | 2013-05-21 16:56:05 +0000 | [diff] [blame] | 23 | import errno |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 24 | import getpass |
| 25 | import glob |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 26 | import logging |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 27 | import os |
| 28 | import re |
maruel@chromium.org | 9b2112a | 2013-04-05 18:01:02 +0000 | [diff] [blame] | 29 | import stat |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 30 | import subprocess |
| 31 | import sys |
| 32 | import tempfile |
| 33 | import threading |
| 34 | import time |
| 35 | import weakref |
| 36 | |
maruel@chromium.org | e532251 | 2013-08-19 20:17:57 +0000 | [diff] [blame] | 37 | from third_party import colorama |
| 38 | from third_party.depot_tools import fix_encoding |
| 39 | from third_party.depot_tools import subcommand |
| 40 | |
maruel@chromium.org | 561d4b2 | 2013-09-26 21:08:08 +0000 | [diff] [blame] | 41 | from utils import file_path |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 42 | from utils import fs |
Marc-Antoine Ruel | f74cffe | 2015-07-15 15:21:34 -0400 | [diff] [blame] | 43 | from utils import logging_utils |
maruel | 8e4e40c | 2016-05-30 06:21:07 -0700 | [diff] [blame] | 44 | from utils import subprocess42 |
vadimsh@chromium.org | a432647 | 2013-08-24 02:05:41 +0000 | [diff] [blame] | 45 | from utils import tools |
| 46 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 47 | ## OS-specific imports |
| 48 | |
| 49 | if sys.platform == 'win32': |
maruel@chromium.org | 561d4b2 | 2013-09-26 21:08:08 +0000 | [diff] [blame] | 50 | from ctypes.wintypes import byref, c_int, c_wchar_p |
| 51 | from ctypes.wintypes import windll # pylint: disable=E0611 |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 52 | |
| 53 | |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 54 | __version__ = '0.2' |
maruel@chromium.org | 3d67199 | 2013-08-20 00:38:27 +0000 | [diff] [blame] | 55 | |
| 56 | |
tansell | a494944 | 2016-06-23 22:34:32 -0700 | [diff] [blame^] | 57 | BASE_DIR = os.path.dirname(os.path.abspath( |
| 58 | __file__.decode(sys.getfilesystemencoding()))) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 59 | ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR)) |
| 60 | |
| 61 | |
| 62 | class TracingFailure(Exception): |
| 63 | """An exception occured during tracing.""" |
| 64 | def __init__(self, description, pid, line_number, line, *args): |
| 65 | super(TracingFailure, self).__init__( |
| 66 | description, pid, line_number, line, *args) |
| 67 | self.description = description |
| 68 | self.pid = pid |
| 69 | self.line_number = line_number |
| 70 | self.line = line |
| 71 | self.extra = args |
| 72 | |
| 73 | def __str__(self): |
| 74 | out = self.description |
| 75 | if self.pid: |
| 76 | out += '\npid: %d' % self.pid |
| 77 | if self.line_number: |
| 78 | out += '\nline: %d' % self.line_number |
| 79 | if self.line: |
| 80 | out += '\n%s' % self.line |
| 81 | if self.extra: |
| 82 | out += '\n' + ', '.join(map(str, filter(None, self.extra))) |
| 83 | return out |
| 84 | |
| 85 | |
| 86 | ## OS-specific functions |
| 87 | |
| 88 | if sys.platform == 'win32': |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 89 | def get_current_encoding(): |
| 90 | """Returns the 'ANSI' code page associated to the process.""" |
| 91 | return 'cp%d' % int(windll.kernel32.GetACP()) |
| 92 | |
| 93 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 94 | def CommandLineToArgvW(command_line): |
| 95 | """Splits a commandline into argv using CommandLineToArgvW().""" |
| 96 | # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx |
| 97 | size = c_int() |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 98 | assert isinstance(command_line, unicode) |
| 99 | ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size)) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 100 | try: |
| 101 | return [arg for arg in (c_wchar_p * size.value).from_address(ptr)] |
| 102 | finally: |
| 103 | windll.kernel32.LocalFree(ptr) |
| 104 | |
| 105 | |
Marc-Antoine Ruel | 5906fcd | 2013-11-08 21:02:24 -0500 | [diff] [blame] | 106 | def can_trace(): |
| 107 | """Returns True if the user is an administrator on Windows. |
| 108 | |
| 109 | It is required for tracing to work. |
| 110 | """ |
| 111 | if sys.platform != 'win32': |
| 112 | return True |
| 113 | return bool(windll.shell32.IsUserAnAdmin()) |
| 114 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 115 | |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 116 | def create_subprocess_thunk(): |
| 117 | """Creates a small temporary script to start the child process. |
| 118 | |
| 119 | This thunk doesn't block, its unique name is used to identify it as the |
| 120 | parent. |
| 121 | """ |
Marc-Antoine Ruel | 3c979cb | 2015-03-11 13:43:28 -0400 | [diff] [blame] | 122 | handle, name = tempfile.mkstemp(prefix=u'trace_inputs_thunk', suffix='.py') |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 123 | try: |
| 124 | os.write( |
| 125 | handle, |
| 126 | ( |
| 127 | 'import subprocess, sys\n' |
| 128 | 'sys.exit(subprocess.call(sys.argv[2:]))\n' |
| 129 | )) |
| 130 | finally: |
| 131 | os.close(handle) |
| 132 | return name |
| 133 | |
| 134 | |
| 135 | def create_exec_thunk(): |
| 136 | """Creates a small temporary script to start the child executable. |
| 137 | |
| 138 | Reads from the file handle provided as the fisrt argument to block, then |
| 139 | execv() the command to be traced. |
| 140 | """ |
Marc-Antoine Ruel | 3c979cb | 2015-03-11 13:43:28 -0400 | [diff] [blame] | 141 | handle, name = tempfile.mkstemp(prefix=u'trace_inputs_thunk', suffix='.py') |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 142 | try: |
| 143 | os.write( |
| 144 | handle, |
| 145 | ( |
| 146 | 'import os, sys\n' |
| 147 | 'fd = int(sys.argv[1])\n' |
| 148 | # This will block until the controlling process writes a byte on the |
| 149 | # pipe. It will do so once the tracing tool, e.g. strace, is ready to |
| 150 | # trace. |
| 151 | 'os.read(fd, 1)\n' |
| 152 | 'os.close(fd)\n' |
| 153 | 'os.execv(sys.argv[2], sys.argv[2:])\n' |
| 154 | )) |
| 155 | finally: |
| 156 | os.close(handle) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 157 | return name |
| 158 | |
| 159 | |
| 160 | def strace_process_quoted_arguments(text): |
| 161 | """Extracts quoted arguments on a string and return the arguments as a list. |
| 162 | |
| 163 | Implemented as an automaton. Supports incomplete strings in the form |
| 164 | '"foo"...'. |
| 165 | |
| 166 | Example: |
| 167 | With text = '"foo", "bar"', the function will return ['foo', 'bar'] |
| 168 | |
| 169 | TODO(maruel): Implement escaping. |
| 170 | """ |
| 171 | # All the possible states of the DFA. |
| 172 | ( NEED_QUOTE, # Begining of a new arguments. |
| 173 | INSIDE_STRING, # Inside an argument. |
| 174 | ESCAPED, # Found a '\' inside a quote. Treat the next char as-is. |
| 175 | NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be |
| 176 | # a serie of 3 dots or a comma. |
| 177 | NEED_SPACE, # Right after a comma |
| 178 | NEED_DOT_2, # Found a dot, need a second one. |
| 179 | NEED_DOT_3, # Found second dot, need a third one. |
| 180 | NEED_COMMA, # Found third dot, need a comma. |
| 181 | ) = range(8) |
| 182 | |
| 183 | state = NEED_QUOTE |
| 184 | out = [] |
| 185 | for index, char in enumerate(text): |
| 186 | if char == '"': |
| 187 | if state == NEED_QUOTE: |
| 188 | state = INSIDE_STRING |
| 189 | # A new argument was found. |
| 190 | out.append('') |
| 191 | elif state == INSIDE_STRING: |
| 192 | # The argument is now closed. |
| 193 | state = NEED_COMMA_OR_DOT |
| 194 | elif state == ESCAPED: |
| 195 | out[-1] += char |
| 196 | state = INSIDE_STRING |
| 197 | else: |
| 198 | raise ValueError( |
maruel@chromium.org | 588e09d | 2013-05-21 14:38:57 +0000 | [diff] [blame] | 199 | 'Can\'t process char \'%s\' at column %d for: %r' % ( |
| 200 | char, index, text), |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 201 | index, |
| 202 | text) |
| 203 | elif char == ',': |
| 204 | if state in (NEED_COMMA_OR_DOT, NEED_COMMA): |
| 205 | state = NEED_SPACE |
| 206 | elif state == INSIDE_STRING: |
| 207 | out[-1] += char |
| 208 | elif state == ESCAPED: |
| 209 | out[-1] += char |
| 210 | state = INSIDE_STRING |
| 211 | else: |
| 212 | raise ValueError( |
maruel@chromium.org | 588e09d | 2013-05-21 14:38:57 +0000 | [diff] [blame] | 213 | 'Can\'t process char \'%s\' at column %d for: %r' % ( |
| 214 | char, index, text), |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 215 | index, |
| 216 | text) |
| 217 | elif char == ' ': |
| 218 | if state == NEED_SPACE: |
| 219 | state = NEED_QUOTE |
| 220 | elif state == INSIDE_STRING: |
| 221 | out[-1] += char |
| 222 | elif state == ESCAPED: |
| 223 | out[-1] += char |
| 224 | state = INSIDE_STRING |
| 225 | else: |
| 226 | raise ValueError( |
maruel@chromium.org | 588e09d | 2013-05-21 14:38:57 +0000 | [diff] [blame] | 227 | 'Can\'t process char \'%s\' at column %d for: %r' % ( |
| 228 | char, index, text), |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 229 | index, |
| 230 | text) |
| 231 | elif char == '.': |
maruel@chromium.org | 588e09d | 2013-05-21 14:38:57 +0000 | [diff] [blame] | 232 | if state in (NEED_QUOTE, NEED_COMMA_OR_DOT): |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 233 | # The string is incomplete, this mean the strace -s flag should be |
| 234 | # increased. |
maruel@chromium.org | 588e09d | 2013-05-21 14:38:57 +0000 | [diff] [blame] | 235 | # For NEED_QUOTE, the input string would look like '"foo", ...'. |
| 236 | # For NEED_COMMA_OR_DOT, the input string would look like '"foo"...' |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 237 | state = NEED_DOT_2 |
| 238 | elif state == NEED_DOT_2: |
| 239 | state = NEED_DOT_3 |
| 240 | elif state == NEED_DOT_3: |
| 241 | state = NEED_COMMA |
| 242 | elif state == INSIDE_STRING: |
| 243 | out[-1] += char |
| 244 | elif state == ESCAPED: |
| 245 | out[-1] += char |
| 246 | state = INSIDE_STRING |
| 247 | else: |
| 248 | raise ValueError( |
maruel@chromium.org | 588e09d | 2013-05-21 14:38:57 +0000 | [diff] [blame] | 249 | 'Can\'t process char \'%s\' at column %d for: %r' % ( |
| 250 | char, index, text), |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 251 | index, |
| 252 | text) |
| 253 | elif char == '\\': |
| 254 | if state == ESCAPED: |
| 255 | out[-1] += char |
| 256 | state = INSIDE_STRING |
| 257 | elif state == INSIDE_STRING: |
| 258 | state = ESCAPED |
| 259 | else: |
| 260 | raise ValueError( |
maruel@chromium.org | 588e09d | 2013-05-21 14:38:57 +0000 | [diff] [blame] | 261 | 'Can\'t process char \'%s\' at column %d for: %r' % ( |
| 262 | char, index, text), |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 263 | index, |
| 264 | text) |
| 265 | else: |
| 266 | if state == INSIDE_STRING: |
| 267 | out[-1] += char |
| 268 | else: |
| 269 | raise ValueError( |
maruel@chromium.org | 588e09d | 2013-05-21 14:38:57 +0000 | [diff] [blame] | 270 | 'Can\'t process char \'%s\' at column %d for: %r' % ( |
| 271 | char, index, text), |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 272 | index, |
| 273 | text) |
| 274 | if state not in (NEED_COMMA, NEED_COMMA_OR_DOT): |
| 275 | raise ValueError( |
| 276 | 'String is incorrectly terminated: %r' % text, |
| 277 | text) |
| 278 | return out |
| 279 | |
| 280 | |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 281 | def assert_is_renderable(pseudo_string): |
| 282 | """Asserts the input is a valid object to be processed by render().""" |
| 283 | assert ( |
maruel@chromium.org | bea76be | 2013-06-03 20:59:25 +0000 | [diff] [blame] | 284 | pseudo_string is None or |
| 285 | isinstance(pseudo_string, unicode) or |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 286 | hasattr(pseudo_string, 'render')), repr(pseudo_string) |
| 287 | |
| 288 | |
| 289 | def render(pseudo_string): |
| 290 | """Converts the pseudo-string to an unicode string.""" |
maruel@chromium.org | bea76be | 2013-06-03 20:59:25 +0000 | [diff] [blame] | 291 | if pseudo_string is None or isinstance(pseudo_string, unicode): |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 292 | return pseudo_string |
| 293 | return pseudo_string.render() |
| 294 | |
| 295 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 296 | class Results(object): |
| 297 | """Results of a trace session.""" |
| 298 | |
| 299 | class _TouchedObject(object): |
| 300 | """Something, a file or a directory, that was accessed.""" |
| 301 | def __init__(self, root, path, tainted, size, nb_files): |
| 302 | logging.debug( |
| 303 | '%s(%s, %s, %s, %s, %s)' % |
| 304 | (self.__class__.__name__, root, path, tainted, size, nb_files)) |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 305 | assert_is_renderable(root) |
| 306 | assert_is_renderable(path) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 307 | self.root = root |
| 308 | self.path = path |
| 309 | self.tainted = tainted |
| 310 | self.nb_files = nb_files |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 311 | # Can be used as a cache or a default value, depending on context. In |
| 312 | # particular, once self.tainted is True, because the path was replaced |
| 313 | # with a variable, it is not possible to look up the file size. |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 314 | self._size = size |
| 315 | # These are cache only. |
| 316 | self._real_path = None |
| 317 | |
| 318 | # Check internal consistency. |
| 319 | assert path, path |
maruel@chromium.org | 561d4b2 | 2013-09-26 21:08:08 +0000 | [diff] [blame] | 320 | assert tainted or bool(root) != bool(file_path.isabs(path)), (root, path) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 321 | assert tainted or ( |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 322 | not fs.exists(self.full_path) or |
maruel@chromium.org | 561d4b2 | 2013-09-26 21:08:08 +0000 | [diff] [blame] | 323 | (self.full_path == file_path.get_native_path_case(self.full_path))), ( |
| 324 | tainted, |
| 325 | self.full_path, |
| 326 | file_path.get_native_path_case(self.full_path)) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 327 | |
| 328 | @property |
| 329 | def existent(self): |
| 330 | return self.size != -1 |
| 331 | |
| 332 | @property |
| 333 | def full_path(self): |
| 334 | if self.root: |
| 335 | return os.path.join(self.root, self.path) |
| 336 | return self.path |
| 337 | |
| 338 | @property |
| 339 | def real_path(self): |
| 340 | """Returns the path with symlinks resolved.""" |
| 341 | if not self._real_path: |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 342 | self._real_path = fs.path.realpath(self.full_path) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 343 | return self._real_path |
| 344 | |
| 345 | @property |
| 346 | def size(self): |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 347 | """File's size. -1 is not existent. |
| 348 | |
| 349 | Once tainted, it is not possible the retrieve the file size anymore since |
| 350 | the path is composed of variables. |
| 351 | """ |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 352 | if self._size is None and not self.tainted: |
| 353 | try: |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 354 | self._size = fs.stat(self.full_path).st_size |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 355 | except OSError: |
| 356 | self._size = -1 |
| 357 | return self._size |
| 358 | |
| 359 | def flatten(self): |
| 360 | """Returns a dict representing this object. |
| 361 | |
| 362 | A 'size' of 0 means the file was only touched and not read. |
| 363 | """ |
| 364 | return { |
| 365 | 'path': self.path, |
| 366 | 'size': self.size, |
| 367 | } |
| 368 | |
| 369 | def replace_variables(self, variables): |
| 370 | """Replaces the root of this File with one of the variables if it matches. |
| 371 | |
| 372 | If a variable replacement occurs, the cloned object becomes tainted. |
| 373 | """ |
| 374 | for variable, root_path in variables.iteritems(): |
| 375 | if self.path.startswith(root_path): |
| 376 | return self._clone( |
| 377 | self.root, variable + self.path[len(root_path):], True) |
| 378 | # No need to clone, returns ourself. |
| 379 | return self |
| 380 | |
| 381 | def strip_root(self, root): |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 382 | """Returns a clone of itself with 'root' stripped off. |
| 383 | |
| 384 | Note that the file is kept if it is either accessible from a symlinked |
| 385 | path that was used to access the file or through the real path. |
| 386 | """ |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 387 | # Check internal consistency. |
maruel@chromium.org | 561d4b2 | 2013-09-26 21:08:08 +0000 | [diff] [blame] | 388 | assert ( |
| 389 | self.tainted or |
| 390 | (file_path.isabs(root) and root.endswith(os.path.sep))), root |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 391 | if not self.full_path.startswith(root): |
| 392 | # Now try to resolve the symlinks to see if it can be reached this way. |
| 393 | # Only try *after* trying without resolving symlink. |
| 394 | if not self.real_path.startswith(root): |
| 395 | return None |
| 396 | path = self.real_path |
| 397 | else: |
| 398 | path = self.full_path |
| 399 | return self._clone(root, path[len(root):], self.tainted) |
| 400 | |
| 401 | def _clone(self, new_root, new_path, tainted): |
| 402 | raise NotImplementedError(self.__class__.__name__) |
| 403 | |
| 404 | class File(_TouchedObject): |
| 405 | """A file that was accessed. May not be present anymore. |
| 406 | |
| 407 | If tainted is true, it means it is not a real path anymore as a variable |
| 408 | replacement occured. |
| 409 | |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 410 | |mode| can be one of None, TOUCHED, READ or WRITE. |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 411 | """ |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 412 | # Was probed for existence, and it is existent, but was never _opened_. |
| 413 | TOUCHED = 't' |
| 414 | # Opened for read only and guaranteed to not have been written to. |
| 415 | READ = 'r' |
| 416 | # Opened for write. |
| 417 | WRITE = 'w' |
| 418 | |
| 419 | # They are listed in order of priority. E.g. if a file is traced as TOUCHED |
| 420 | # then as WRITE, only keep WRITE. None means no idea, which is a problem on |
| 421 | # Windows. |
| 422 | ACCEPTABLE_MODES = (None, TOUCHED, READ, WRITE) |
| 423 | |
| 424 | def __init__(self, root, path, tainted, size, mode): |
| 425 | assert mode in self.ACCEPTABLE_MODES |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 426 | super(Results.File, self).__init__(root, path, tainted, size, 1) |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 427 | self.mode = mode |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 428 | |
| 429 | def _clone(self, new_root, new_path, tainted): |
| 430 | """Clones itself keeping meta-data.""" |
| 431 | # Keep the self.size and self._real_path caches for performance reason. It |
| 432 | # is also important when the file becomes tainted (with a variable instead |
| 433 | # of the real path) since self.path is not an on-disk path anymore so |
| 434 | # out._size cannot be updated. |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 435 | out = self.__class__(new_root, new_path, tainted, self.size, self.mode) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 436 | out._real_path = self._real_path |
| 437 | return out |
| 438 | |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 439 | def flatten(self): |
| 440 | out = super(Results.File, self).flatten() |
| 441 | out['mode'] = self.mode |
| 442 | return out |
| 443 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 444 | class Directory(_TouchedObject): |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 445 | """A directory of files. Must exist. |
| 446 | |
| 447 | For a Directory instance, self.size is not a cache, it's an actual value |
| 448 | that is never modified and represents the total size of the files contained |
| 449 | in this directory. It is possible that the directory is empty so that |
| 450 | size==0; this happens if there's only an invalid symlink in it. |
| 451 | """ |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 452 | def __init__(self, root, path, tainted, size, nb_files): |
| 453 | """path='.' is a valid value and must be handled appropriately.""" |
| 454 | assert not path.endswith(os.path.sep), path |
| 455 | super(Results.Directory, self).__init__( |
| 456 | root, path + os.path.sep, tainted, size, nb_files) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 457 | |
| 458 | def flatten(self): |
| 459 | out = super(Results.Directory, self).flatten() |
| 460 | out['nb_files'] = self.nb_files |
| 461 | return out |
| 462 | |
| 463 | def _clone(self, new_root, new_path, tainted): |
| 464 | """Clones itself keeping meta-data.""" |
| 465 | out = self.__class__( |
| 466 | new_root, |
| 467 | new_path.rstrip(os.path.sep), |
| 468 | tainted, |
| 469 | self.size, |
| 470 | self.nb_files) |
| 471 | out._real_path = self._real_path |
| 472 | return out |
| 473 | |
| 474 | class Process(object): |
| 475 | """A process that was traced. |
| 476 | |
| 477 | Contains references to the files accessed by this process and its children. |
| 478 | """ |
| 479 | def __init__(self, pid, files, executable, command, initial_cwd, children): |
| 480 | logging.debug('Process(%s, %d, ...)' % (pid, len(files))) |
| 481 | self.pid = pid |
| 482 | self.files = sorted(files, key=lambda x: x.path) |
| 483 | self.children = children |
| 484 | self.executable = executable |
| 485 | self.command = command |
| 486 | self.initial_cwd = initial_cwd |
| 487 | |
| 488 | # Check internal consistency. |
| 489 | assert len(set(f.path for f in self.files)) == len(self.files), sorted( |
| 490 | f.path for f in self.files) |
| 491 | assert isinstance(self.children, list) |
| 492 | assert isinstance(self.files, list) |
| 493 | |
| 494 | @property |
| 495 | def all(self): |
| 496 | for child in self.children: |
| 497 | for i in child.all: |
| 498 | yield i |
| 499 | yield self |
| 500 | |
| 501 | def flatten(self): |
| 502 | return { |
| 503 | 'children': [c.flatten() for c in self.children], |
| 504 | 'command': self.command, |
| 505 | 'executable': self.executable, |
| 506 | 'files': [f.flatten() for f in self.files], |
| 507 | 'initial_cwd': self.initial_cwd, |
| 508 | 'pid': self.pid, |
| 509 | } |
| 510 | |
| 511 | def strip_root(self, root): |
maruel@chromium.org | 561d4b2 | 2013-09-26 21:08:08 +0000 | [diff] [blame] | 512 | assert file_path.isabs(root) and root.endswith(os.path.sep), root |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 513 | # Loads the files after since they are constructed as objects. |
| 514 | out = self.__class__( |
| 515 | self.pid, |
| 516 | filter(None, (f.strip_root(root) for f in self.files)), |
| 517 | self.executable, |
| 518 | self.command, |
| 519 | self.initial_cwd, |
| 520 | [c.strip_root(root) for c in self.children]) |
| 521 | logging.debug( |
| 522 | 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files))) |
| 523 | return out |
| 524 | |
| 525 | def __init__(self, process): |
| 526 | self.process = process |
| 527 | # Cache. |
| 528 | self._files = None |
| 529 | |
| 530 | def flatten(self): |
| 531 | return { |
| 532 | 'root': self.process.flatten(), |
| 533 | } |
| 534 | |
| 535 | @property |
| 536 | def files(self): |
| 537 | if self._files is None: |
| 538 | self._files = sorted( |
| 539 | sum((p.files for p in self.process.all), []), |
| 540 | key=lambda x: x.path) |
| 541 | return self._files |
| 542 | |
| 543 | @property |
| 544 | def existent(self): |
| 545 | return [f for f in self.files if f.existent] |
| 546 | |
| 547 | @property |
| 548 | def non_existent(self): |
| 549 | return [f for f in self.files if not f.existent] |
| 550 | |
| 551 | def strip_root(self, root): |
| 552 | """Returns a clone with all the files outside the directory |root| removed |
| 553 | and converts all the path to be relative paths. |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 554 | |
| 555 | It keeps files accessible through the |root| directory or that have been |
| 556 | accessed through any symlink which points to the same directory. |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 557 | """ |
| 558 | # Resolve any symlink |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 559 | root = fs.realpath(root) |
maruel@chromium.org | 561d4b2 | 2013-09-26 21:08:08 +0000 | [diff] [blame] | 560 | root = ( |
| 561 | file_path.get_native_path_case(root).rstrip(os.path.sep) + os.path.sep) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 562 | logging.debug('strip_root(%s)' % root) |
| 563 | return Results(self.process.strip_root(root)) |
| 564 | |
| 565 | |
| 566 | class ApiBase(object): |
| 567 | """OS-agnostic API to trace a process and its children.""" |
| 568 | class Context(object): |
| 569 | """Processes one log line at a time and keeps the list of traced processes. |
| 570 | |
| 571 | The parsing is complicated by the fact that logs are traced out of order for |
| 572 | strace but in-order for dtrace and logman. In addition, on Windows it is |
| 573 | very frequent that processids are reused so a flat list cannot be used. But |
| 574 | at the same time, it is impossible to faithfully construct a graph when the |
| 575 | logs are processed out of order. So both a tree and a flat mapping are used, |
| 576 | the tree is the real process tree, while the flat mapping stores the last |
| 577 | valid process for the corresponding processid. For the strace case, the |
| 578 | tree's head is guessed at the last moment. |
| 579 | """ |
| 580 | class Process(object): |
| 581 | """Keeps context for one traced child process. |
| 582 | |
| 583 | Logs all the files this process touched. Ignores directories. |
| 584 | """ |
| 585 | def __init__(self, blacklist, pid, initial_cwd): |
| 586 | # Check internal consistency. |
| 587 | assert isinstance(pid, int), repr(pid) |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 588 | assert_is_renderable(initial_cwd) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 589 | self.pid = pid |
| 590 | # children are Process instances. |
| 591 | self.children = [] |
| 592 | self.initial_cwd = initial_cwd |
| 593 | self.cwd = None |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 594 | self.files = {} |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 595 | self.executable = None |
| 596 | self.command = None |
| 597 | self._blacklist = blacklist |
| 598 | |
| 599 | def to_results_process(self): |
| 600 | """Resolves file case sensitivity and or late-bound strings.""" |
| 601 | # When resolving files, it's normal to get dupe because a file could be |
| 602 | # opened multiple times with different case. Resolve the deduplication |
| 603 | # here. |
| 604 | def fix_path(x): |
| 605 | """Returns the native file path case. |
| 606 | |
| 607 | Converts late-bound strings. |
| 608 | """ |
| 609 | if not x: |
| 610 | # Do not convert None instance to 'None'. |
| 611 | return x |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 612 | x = render(x) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 613 | if os.path.isabs(x): |
| 614 | # If the path is not absolute, which tends to happen occasionally on |
| 615 | # Windows, it is not possible to get the native path case so ignore |
| 616 | # that trace. It mostly happens for 'executable' value. |
maruel@chromium.org | 561d4b2 | 2013-09-26 21:08:08 +0000 | [diff] [blame] | 617 | x = file_path.get_native_path_case(x) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 618 | return x |
| 619 | |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 620 | def fix_and_blacklist_path(x, m): |
| 621 | """Receives a tuple (filepath, mode) and processes filepath.""" |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 622 | x = fix_path(x) |
| 623 | if not x: |
| 624 | return |
| 625 | # The blacklist needs to be reapplied, since path casing could |
| 626 | # influence blacklisting. |
| 627 | if self._blacklist(x): |
| 628 | return |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 629 | # Filters out directories. Some may have passed through. |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 630 | if fs.isdir(x): |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 631 | return |
| 632 | return x, m |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 633 | |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 634 | # Renders all the files as strings, as some could be RelativePath |
| 635 | # instances. It is important to do it first since there could still be |
| 636 | # multiple entries with the same path but different modes. |
| 637 | rendered = ( |
| 638 | fix_and_blacklist_path(f, m) for f, m in self.files.iteritems()) |
| 639 | files = sorted( |
| 640 | (f for f in rendered if f), |
| 641 | key=lambda x: (x[0], Results.File.ACCEPTABLE_MODES.index(x[1]))) |
| 642 | # Then converting into a dict will automatically clean up lesser |
| 643 | # important values. |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 644 | files = [ |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 645 | Results.File(None, f, False, None, m) |
| 646 | for f, m in dict(files).iteritems() |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 647 | ] |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 648 | return Results.Process( |
| 649 | self.pid, |
| 650 | files, |
| 651 | fix_path(self.executable), |
| 652 | self.command, |
| 653 | fix_path(self.initial_cwd), |
| 654 | [c.to_results_process() for c in self.children]) |
| 655 | |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 656 | def add_file(self, filepath, mode): |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 657 | """Adds a file if it passes the blacklist.""" |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 658 | if self._blacklist(render(filepath)): |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 659 | return |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 660 | logging.debug('add_file(%d, %s, %s)', self.pid, filepath, mode) |
| 661 | # Note that filepath and not render(filepath) is added. It is because |
| 662 | # filepath could be something else than a string, like a RelativePath |
| 663 | # instance for dtrace logs. |
| 664 | modes = Results.File.ACCEPTABLE_MODES |
| 665 | old_mode = self.files.setdefault(filepath, mode) |
| 666 | if old_mode != mode and modes.index(old_mode) < modes.index(mode): |
| 667 | # Take the highest value. |
| 668 | self.files[filepath] = mode |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 669 | |
| 670 | def __init__(self, blacklist): |
| 671 | self.blacklist = blacklist |
| 672 | # Initial process. |
| 673 | self.root_process = None |
| 674 | # dict to accelerate process lookup, to not have to lookup the whole graph |
| 675 | # each time. |
| 676 | self._process_lookup = {} |
| 677 | |
| 678 | class Tracer(object): |
| 679 | """During it's lifetime, the tracing subsystem is enabled.""" |
| 680 | def __init__(self, logname): |
| 681 | self._logname = logname |
maruel@chromium.org | 808e3f2 | 2013-05-16 16:42:54 +0000 | [diff] [blame] | 682 | self._lock = threading.RLock() |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 683 | self._traces = [] |
| 684 | self._initialized = True |
maruel@chromium.org | 4c9b4fa | 2013-05-29 19:22:14 +0000 | [diff] [blame] | 685 | self._scripts_to_cleanup = [] |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 686 | |
| 687 | def trace(self, cmd, cwd, tracename, output): |
| 688 | """Runs the OS-specific trace program on an executable. |
| 689 | |
| 690 | Arguments: |
| 691 | - cmd: The command (a list) to run. |
| 692 | - cwd: Current directory to start the child process in. |
| 693 | - tracename: Name of the trace in the logname file. |
| 694 | - output: If False, redirects output to PIPEs. |
| 695 | |
| 696 | Returns a tuple (resultcode, output) and updates the internal trace |
| 697 | entries. |
| 698 | """ |
| 699 | # The implementation adds an item to self._traces. |
| 700 | raise NotImplementedError(self.__class__.__name__) |
| 701 | |
| 702 | def close(self, _timeout=None): |
| 703 | """Saves the meta-data in the logname file. |
| 704 | |
| 705 | For kernel-based tracing, stops the tracing subsystem. |
| 706 | |
| 707 | Must not be used manually when using 'with' construct. |
| 708 | """ |
| 709 | with self._lock: |
maruel@chromium.org | 808e3f2 | 2013-05-16 16:42:54 +0000 | [diff] [blame] | 710 | if not self._initialized: |
| 711 | raise TracingFailure( |
| 712 | 'Called %s.close() on an unitialized object' % |
| 713 | self.__class__.__name__, |
| 714 | None, None, None) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 715 | try: |
maruel@chromium.org | 4c9b4fa | 2013-05-29 19:22:14 +0000 | [diff] [blame] | 716 | while self._scripts_to_cleanup: |
| 717 | try: |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 718 | fs.remove(self._scripts_to_cleanup.pop()) |
maruel@chromium.org | 4c9b4fa | 2013-05-29 19:22:14 +0000 | [diff] [blame] | 719 | except OSError as e: |
| 720 | logging.error('Failed to delete a temporary script: %s', e) |
Marc-Antoine Ruel | de01180 | 2013-11-12 15:19:47 -0500 | [diff] [blame] | 721 | tools.write_json(self._logname, self._gen_logdata(), False) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 722 | finally: |
| 723 | self._initialized = False |
| 724 | |
| 725 | def post_process_log(self): |
| 726 | """Post-processes the log so it becomes faster to load afterward. |
| 727 | |
| 728 | Must not be used manually when using 'with' construct. |
| 729 | """ |
| 730 | assert not self._initialized, 'Must stop tracing first.' |
| 731 | |
maruel@chromium.org | 808e3f2 | 2013-05-16 16:42:54 +0000 | [diff] [blame] | 732 | def _gen_logdata(self): |
| 733 | """Returns the data to be saved in the trace file.""" |
| 734 | return { |
| 735 | 'traces': self._traces, |
| 736 | } |
| 737 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 738 | def __enter__(self): |
| 739 | """Enables 'with' statement.""" |
| 740 | return self |
| 741 | |
| 742 | def __exit__(self, exc_type, exc_value, traceback): |
| 743 | """Enables 'with' statement.""" |
| 744 | self.close() |
| 745 | # If an exception was thrown, do not process logs. |
| 746 | if not exc_type: |
| 747 | self.post_process_log() |
| 748 | |
| 749 | def get_tracer(self, logname): |
| 750 | """Returns an ApiBase.Tracer instance. |
| 751 | |
| 752 | Initializes the tracing subsystem, which is a requirement for kernel-based |
| 753 | tracers. Only one tracer instance should be live at a time! |
| 754 | |
| 755 | logname is the filepath to the json file that will contain the meta-data |
| 756 | about the logs. |
| 757 | """ |
| 758 | return self.Tracer(logname) |
| 759 | |
| 760 | @staticmethod |
| 761 | def clean_trace(logname): |
| 762 | """Deletes an old log.""" |
| 763 | raise NotImplementedError() |
| 764 | |
| 765 | @classmethod |
maruel@chromium.org | ec74ff8 | 2012-10-29 18:14:47 +0000 | [diff] [blame] | 766 | def parse_log(cls, logname, blacklist, trace_name): |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 767 | """Processes trace logs and returns the files opened and the files that do |
| 768 | not exist. |
| 769 | |
| 770 | It does not track directories. |
| 771 | |
maruel@chromium.org | ec74ff8 | 2012-10-29 18:14:47 +0000 | [diff] [blame] | 772 | Arguments: |
| 773 | - logname: must be an absolute path. |
| 774 | - blacklist: must be a lambda. |
| 775 | - trace_name: optional trace to read, defaults to reading all traces. |
maruel@chromium.org | 8b056ba | 2012-10-16 14:04:49 +0000 | [diff] [blame] | 776 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 777 | Most of the time, files that do not exist are temporary test files that |
| 778 | should be put in /tmp instead. See http://crbug.com/116251. |
| 779 | |
| 780 | Returns a list of dict with keys: |
| 781 | - results: A Results instance. |
| 782 | - trace: The corresponding tracename parameter provided to |
| 783 | get_tracer().trace(). |
| 784 | - output: Output gathered during execution, if get_tracer().trace(..., |
| 785 | output=False) was used. |
| 786 | """ |
| 787 | raise NotImplementedError(cls.__class__.__name__) |
| 788 | |
| 789 | |
| 790 | class Strace(ApiBase): |
| 791 | """strace implies linux.""" |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 792 | @staticmethod |
| 793 | def load_filename(filename): |
| 794 | """Parses a filename in a log.""" |
maruel@chromium.org | bea76be | 2013-06-03 20:59:25 +0000 | [diff] [blame] | 795 | # TODO(maruel): Be compatible with strace -x. |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 796 | assert isinstance(filename, str) |
| 797 | out = '' |
| 798 | i = 0 |
| 799 | while i < len(filename): |
| 800 | c = filename[i] |
| 801 | if c == '\\': |
| 802 | out += chr(int(filename[i+1:i+4], 8)) |
| 803 | i += 4 |
| 804 | else: |
| 805 | out += c |
| 806 | i += 1 |
| 807 | # TODO(maruel): That's not necessarily true that the current code page is |
| 808 | # utf-8. |
| 809 | return out.decode('utf-8') |
| 810 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 811 | class Context(ApiBase.Context): |
| 812 | """Processes a strace log line and keeps the list of existent and non |
| 813 | existent files accessed. |
| 814 | |
| 815 | Ignores directories. |
| 816 | |
| 817 | Uses late-binding to processes the cwd of each process. The problem is that |
| 818 | strace generates one log file per process it traced but doesn't give any |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 819 | information about which process was started when and by who. So process the |
| 820 | logs out of order and use late binding with RelativePath to be able to |
| 821 | deduce the initial directory of each process once all the logs are parsed. |
| 822 | |
| 823 | TODO(maruel): Use the script even in the non-sudo case, so log parsing can |
| 824 | be done in two phase: first find the root process, then process the child |
| 825 | processes in order. With that, it should be possible to not use RelativePath |
| 826 | anymore. This would significantly simplify the code! |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 827 | """ |
| 828 | class Process(ApiBase.Context.Process): |
| 829 | """Represents the state of a process. |
| 830 | |
| 831 | Contains all the information retrieved from the pid-specific log. |
| 832 | """ |
| 833 | # Function names are using ([a-z_0-9]+) |
| 834 | # This is the most common format. function(args) = result |
maruel@chromium.org | 588e09d | 2013-05-21 14:38:57 +0000 | [diff] [blame] | 835 | RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.*?)\)\s+= (.+)$') |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 836 | # An interrupted function call, only grab the minimal header. |
| 837 | RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$') |
| 838 | # A resumed function call. |
| 839 | RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$') |
| 840 | # A process received a signal. |
| 841 | RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---') |
| 842 | # A process didn't handle a signal. Ignore any junk appearing before, |
| 843 | # because the process was forcibly killed so it won't open any new file. |
| 844 | RE_KILLED = re.compile( |
| 845 | r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$') |
| 846 | # The process has exited. |
| 847 | RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+') |
| 848 | # A call was canceled. Ignore any prefix. |
| 849 | RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$') |
| 850 | # Happens when strace fails to even get the function name. |
| 851 | UNNAMED_FUNCTION = '????' |
| 852 | |
| 853 | # Corner-case in python, a class member function decorator must not be |
| 854 | # @staticmethod. |
| 855 | def parse_args(regexp, expect_zero): # pylint: disable=E0213 |
| 856 | """Automatically convert the str 'args' into a list of processed |
| 857 | arguments. |
| 858 | |
| 859 | Arguments: |
| 860 | - regexp is used to parse args. |
| 861 | - expect_zero: one of True, False or None. |
| 862 | - True: will check for result.startswith('0') first and will ignore |
| 863 | the trace line completely otherwise. This is important because for |
| 864 | many functions, the regexp will not process if the call failed. |
| 865 | - False: will check for not result.startswith(('?', '-1')) for the |
| 866 | same reason than with True. |
| 867 | - None: ignore result. |
| 868 | """ |
| 869 | def meta_hook(function): |
| 870 | assert function.__name__.startswith('handle_') |
| 871 | def hook(self, args, result): |
| 872 | if expect_zero is True and not result.startswith('0'): |
| 873 | return |
| 874 | if expect_zero is False and result.startswith(('?', '-1')): |
| 875 | return |
| 876 | match = re.match(regexp, args) |
| 877 | if not match: |
| 878 | raise TracingFailure( |
| 879 | 'Failed to parse %s(%s) = %s' % |
| 880 | (function.__name__[len('handle_'):], args, result), |
| 881 | None, None, None) |
| 882 | return function(self, match.groups(), result) |
| 883 | return hook |
| 884 | return meta_hook |
| 885 | |
| 886 | class RelativePath(object): |
| 887 | """A late-bound relative path.""" |
| 888 | def __init__(self, parent, value): |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 889 | assert_is_renderable(parent) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 890 | self.parent = parent |
maruel@chromium.org | bea76be | 2013-06-03 20:59:25 +0000 | [diff] [blame] | 891 | assert ( |
| 892 | value is None or |
| 893 | (isinstance(value, unicode) and not os.path.isabs(value))) |
| 894 | self.value = value |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 895 | if self.value: |
maruel@chromium.org | bea76be | 2013-06-03 20:59:25 +0000 | [diff] [blame] | 896 | # TODO(maruel): On POSIX, '\\' is a valid character so remove this |
| 897 | # assert. |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 898 | assert '\\' not in self.value, value |
| 899 | assert '\\' not in self.value, (repr(value), repr(self.value)) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 900 | |
| 901 | def render(self): |
| 902 | """Returns the current directory this instance is representing. |
| 903 | |
| 904 | This function is used to return the late-bound value. |
| 905 | """ |
maruel@chromium.org | bea76be | 2013-06-03 20:59:25 +0000 | [diff] [blame] | 906 | assert self.parent is not None |
| 907 | parent = render(self.parent) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 908 | if self.value: |
| 909 | return os.path.normpath(os.path.join(parent, self.value)) |
| 910 | return parent |
| 911 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 912 | def __init__(self, root, pid): |
| 913 | """Keeps enough information to be able to guess the original process |
| 914 | root. |
| 915 | |
| 916 | strace doesn't store which process was the initial process. So more |
| 917 | information needs to be kept so the graph can be reconstructed from the |
| 918 | flat map. |
| 919 | """ |
| 920 | logging.info('%s(%d)' % (self.__class__.__name__, pid)) |
| 921 | super(Strace.Context.Process, self).__init__(root.blacklist, pid, None) |
| 922 | assert isinstance(root, ApiBase.Context) |
| 923 | self._root = weakref.ref(root) |
| 924 | # The dict key is the function name of the pending call, like 'open' |
| 925 | # or 'execve'. |
| 926 | self._pending_calls = {} |
| 927 | self._line_number = 0 |
| 928 | # Current directory when the process started. |
maruel@chromium.org | bea76be | 2013-06-03 20:59:25 +0000 | [diff] [blame] | 929 | if isinstance(self._root(), unicode): |
| 930 | self.initial_cwd = self._root() |
| 931 | else: |
| 932 | self.initial_cwd = self.RelativePath(self._root(), None) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 933 | self.parentid = None |
maruel@chromium.org | 9f72e0f | 2012-10-18 14:48:49 +0000 | [diff] [blame] | 934 | self._done = False |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 935 | |
| 936 | def get_cwd(self): |
| 937 | """Returns the best known value of cwd.""" |
| 938 | return self.cwd or self.initial_cwd |
| 939 | |
| 940 | def render(self): |
| 941 | """Returns the string value of the RelativePath() object. |
| 942 | |
| 943 | Used by RelativePath. Returns the initial directory and not the |
| 944 | current one since the current directory 'cwd' validity is time-limited. |
| 945 | |
| 946 | The validity is only guaranteed once all the logs are processed. |
| 947 | """ |
| 948 | return self.initial_cwd.render() |
| 949 | |
| 950 | def on_line(self, line): |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 951 | assert isinstance(line, str) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 952 | self._line_number += 1 |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 953 | try: |
maruel@chromium.org | 588e09d | 2013-05-21 14:38:57 +0000 | [diff] [blame] | 954 | if self._done: |
| 955 | raise TracingFailure( |
| 956 | 'Found a trace for a terminated process or corrupted log', |
| 957 | None, None, None) |
| 958 | |
| 959 | if self.RE_SIGNAL.match(line): |
| 960 | # Ignore signals. |
| 961 | return |
| 962 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 963 | match = self.RE_KILLED.match(line) |
| 964 | if match: |
| 965 | # Converts a '+++ killed by Foo +++' trace into an exit_group(). |
| 966 | self.handle_exit_group(match.group(1), None) |
| 967 | return |
| 968 | |
| 969 | match = self.RE_PROCESS_EXITED.match(line) |
| 970 | if match: |
| 971 | # Converts a '+++ exited with 1 +++' trace into an exit_group() |
| 972 | self.handle_exit_group(match.group(1), None) |
| 973 | return |
| 974 | |
| 975 | match = self.RE_UNFINISHED.match(line) |
| 976 | if match: |
| 977 | if match.group(1) in self._pending_calls: |
| 978 | raise TracingFailure( |
| 979 | 'Found two unfinished calls for the same function', |
| 980 | None, None, None, |
| 981 | self._pending_calls) |
| 982 | self._pending_calls[match.group(1)] = ( |
| 983 | match.group(1) + match.group(2)) |
| 984 | return |
| 985 | |
| 986 | match = self.RE_UNAVAILABLE.match(line) |
| 987 | if match: |
| 988 | # This usually means a process was killed and a pending call was |
| 989 | # canceled. |
| 990 | # TODO(maruel): Look up the last exit_group() trace just above and |
| 991 | # make sure any self._pending_calls[anything] is properly flushed. |
| 992 | return |
| 993 | |
| 994 | match = self.RE_RESUMED.match(line) |
| 995 | if match: |
| 996 | if match.group(1) not in self._pending_calls: |
| 997 | raise TracingFailure( |
| 998 | 'Found a resumed call that was not logged as unfinished', |
| 999 | None, None, None, |
| 1000 | self._pending_calls) |
| 1001 | pending = self._pending_calls.pop(match.group(1)) |
| 1002 | # Reconstruct the line. |
| 1003 | line = pending + match.group(2) |
| 1004 | |
| 1005 | match = self.RE_HEADER.match(line) |
| 1006 | if not match: |
maruel@chromium.org | bcfc9e1 | 2013-03-19 18:47:17 +0000 | [diff] [blame] | 1007 | # The line is corrupted. It happens occasionally when a process is |
| 1008 | # killed forcibly with activity going on. Assume the process died. |
| 1009 | # No other line can be processed afterward. |
maruel@chromium.org | 588e09d | 2013-05-21 14:38:57 +0000 | [diff] [blame] | 1010 | logging.debug('%d is done: %s', self.pid, line) |
maruel@chromium.org | bcfc9e1 | 2013-03-19 18:47:17 +0000 | [diff] [blame] | 1011 | self._done = True |
| 1012 | return |
| 1013 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1014 | if match.group(1) == self.UNNAMED_FUNCTION: |
| 1015 | return |
| 1016 | |
| 1017 | # It's a valid line, handle it. |
| 1018 | handler = getattr(self, 'handle_%s' % match.group(1), None) |
| 1019 | if not handler: |
| 1020 | self._handle_unknown(match.group(1), match.group(2), match.group(3)) |
| 1021 | return handler(match.group(2), match.group(3)) |
| 1022 | except TracingFailure, e: |
| 1023 | # Hack in the values since the handler could be a static function. |
| 1024 | e.pid = self.pid |
| 1025 | e.line = line |
| 1026 | e.line_number = self._line_number |
| 1027 | # Re-raise the modified exception. |
| 1028 | raise |
| 1029 | except (KeyError, NotImplementedError, ValueError), e: |
| 1030 | raise TracingFailure( |
| 1031 | 'Trace generated a %s exception: %s' % ( |
| 1032 | e.__class__.__name__, str(e)), |
| 1033 | self.pid, |
| 1034 | self._line_number, |
| 1035 | line, |
| 1036 | e) |
| 1037 | |
| 1038 | @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True) |
| 1039 | def handle_access(self, args, _result): |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1040 | self._handle_file(args[0], Results.File.TOUCHED) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1041 | |
| 1042 | @parse_args(r'^\"(.+?)\"$', True) |
| 1043 | def handle_chdir(self, args, _result): |
| 1044 | """Updates cwd.""" |
maruel@chromium.org | bea76be | 2013-06-03 20:59:25 +0000 | [diff] [blame] | 1045 | self.cwd = self._mangle(args[0]) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1046 | logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd)) |
| 1047 | |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1048 | @parse_args(r'^\"(.+?)\", (\d+), (\d+)$', False) |
| 1049 | def handle_chown(self, args, _result): |
| 1050 | # TODO(maruel): Look at result? |
| 1051 | self._handle_file(args[0], Results.File.WRITE) |
maruel@chromium.org | 5555072 | 2013-05-28 14:43:14 +0000 | [diff] [blame] | 1052 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1053 | def handle_clone(self, _args, result): |
maruel@chromium.org | 588e09d | 2013-05-21 14:38:57 +0000 | [diff] [blame] | 1054 | self._handling_forking('clone', result) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1055 | |
| 1056 | def handle_close(self, _args, _result): |
| 1057 | pass |
| 1058 | |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1059 | @parse_args(r'^\"(.+?)\", (\d+)$', False) |
| 1060 | def handle_chmod(self, args, _result): |
| 1061 | self._handle_file(args[0], Results.File.WRITE) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1062 | |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1063 | @parse_args(r'^\"(.+?)\", (\d+)$', False) |
| 1064 | def handle_creat(self, args, _result): |
| 1065 | self._handle_file(args[0], Results.File.WRITE) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1066 | |
| 1067 | @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True) |
| 1068 | def handle_execve(self, args, _result): |
| 1069 | # Even if in practice execve() doesn't returns when it succeeds, strace |
| 1070 | # still prints '0' as the result. |
| 1071 | filepath = args[0] |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1072 | self._handle_file(filepath, Results.File.READ) |
maruel@chromium.org | bea76be | 2013-06-03 20:59:25 +0000 | [diff] [blame] | 1073 | self.executable = self._mangle(filepath) |
maruel@chromium.org | 588e09d | 2013-05-21 14:38:57 +0000 | [diff] [blame] | 1074 | try: |
| 1075 | self.command = strace_process_quoted_arguments(args[1]) |
| 1076 | except ValueError as e: |
| 1077 | raise TracingFailure( |
| 1078 | 'Failed to process command line argument:\n%s' % e.args[0], |
| 1079 | None, None, None) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1080 | |
| 1081 | def handle_exit_group(self, _args, _result): |
| 1082 | """Removes cwd.""" |
| 1083 | self.cwd = None |
| 1084 | |
csharp@chromium.org | 11ae067 | 2012-10-30 14:53:17 +0000 | [diff] [blame] | 1085 | @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True) |
| 1086 | def handle_faccessat(self, args, _results): |
| 1087 | if args[0] == 'AT_FDCWD': |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1088 | self._handle_file(args[1], Results.File.TOUCHED) |
csharp@chromium.org | 11ae067 | 2012-10-30 14:53:17 +0000 | [diff] [blame] | 1089 | else: |
| 1090 | raise Exception('Relative faccess not implemented.') |
| 1091 | |
maruel@chromium.org | 0781f32 | 2013-05-28 19:45:49 +0000 | [diff] [blame] | 1092 | def handle_fallocate(self, _args, result): |
| 1093 | pass |
| 1094 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1095 | def handle_fork(self, args, result): |
| 1096 | self._handle_unknown('fork', args, result) |
| 1097 | |
maruel@chromium.org | 9f72e0f | 2012-10-18 14:48:49 +0000 | [diff] [blame] | 1098 | def handle_futex(self, _args, _result): |
| 1099 | pass |
| 1100 | |
Marc-Antoine Ruel | aa4fff5 | 2014-10-24 09:10:13 -0400 | [diff] [blame] | 1101 | @parse_args(r'^(\".+?\"|0x[a-f0-9]+), (\d+)$', False) |
maruel@chromium.org | bea76be | 2013-06-03 20:59:25 +0000 | [diff] [blame] | 1102 | def handle_getcwd(self, args, _result): |
Marc-Antoine Ruel | aa4fff5 | 2014-10-24 09:10:13 -0400 | [diff] [blame] | 1103 | # TODO(maruel): Resolve file handle. |
| 1104 | if not args[0].startswith('0x'): |
| 1105 | filepath = args[0][1:-1] |
| 1106 | if os.path.isabs(filepath): |
| 1107 | logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd)) |
| 1108 | if not isinstance(self.cwd, unicode): |
| 1109 | # Take the occasion to reset the path. |
| 1110 | self.cwd = self._mangle(filepath) |
| 1111 | else: |
| 1112 | # It should always match. |
| 1113 | assert self.cwd == Strace.load_filename(filepath), ( |
| 1114 | self.cwd, filepath) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1115 | |
| 1116 | @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True) |
| 1117 | def handle_link(self, args, _result): |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1118 | self._handle_file(args[0], Results.File.READ) |
| 1119 | self._handle_file(args[1], Results.File.WRITE) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1120 | |
Marc-Antoine Ruel | aa4fff5 | 2014-10-24 09:10:13 -0400 | [diff] [blame] | 1121 | @parse_args(r'^(\".+?\"|0x[a-f0-9]+), \{(?:.+?, )?\.\.\.\}$', True) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1122 | def handle_lstat(self, args, _result): |
Marc-Antoine Ruel | aa4fff5 | 2014-10-24 09:10:13 -0400 | [diff] [blame] | 1123 | # TODO(maruel): Resolve file handle. |
| 1124 | if not args[0].startswith('0x'): |
| 1125 | self._handle_file(args[0][1:-1], Results.File.TOUCHED) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1126 | |
| 1127 | def handle_mkdir(self, _args, _result): |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1128 | # We track content, not directories. |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1129 | pass |
| 1130 | |
Marc-Antoine Ruel | aa4fff5 | 2014-10-24 09:10:13 -0400 | [diff] [blame] | 1131 | @parse_args(r'^(\".*?\"|0x[a-f0-9]+), ([A-Z\_\|]+)(|, \d+)$', False) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1132 | def handle_open(self, args, _result): |
| 1133 | if 'O_DIRECTORY' in args[1]: |
| 1134 | return |
Marc-Antoine Ruel | aa4fff5 | 2014-10-24 09:10:13 -0400 | [diff] [blame] | 1135 | # TODO(maruel): Resolve file handle. |
| 1136 | if not args[0].startswith('0x'): |
| 1137 | t = Results.File.READ if 'O_RDONLY' in args[1] else Results.File.WRITE |
| 1138 | self._handle_file(args[0][1:-1], t) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1139 | |
Marc-Antoine Ruel | aa4fff5 | 2014-10-24 09:10:13 -0400 | [diff] [blame] | 1140 | @parse_args( |
| 1141 | r'^(\d+|AT_FDCWD), (\".*?\"|0x[a-f0-9]+), ([A-Z\_\|]+)(|, \d+)$', |
| 1142 | False) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1143 | def handle_openat(self, args, _result): |
| 1144 | if 'O_DIRECTORY' in args[2]: |
| 1145 | return |
csharp@chromium.org | 11ae067 | 2012-10-30 14:53:17 +0000 | [diff] [blame] | 1146 | if args[0] == 'AT_FDCWD': |
Marc-Antoine Ruel | aa4fff5 | 2014-10-24 09:10:13 -0400 | [diff] [blame] | 1147 | # TODO(maruel): Resolve file handle. |
| 1148 | if not args[1].startswith('0x'): |
| 1149 | t = ( |
| 1150 | Results.File.READ if 'O_RDONLY' in args[2] |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1151 | else Results.File.WRITE) |
Marc-Antoine Ruel | aa4fff5 | 2014-10-24 09:10:13 -0400 | [diff] [blame] | 1152 | self._handle_file(args[1][1:-1], t) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1153 | else: |
| 1154 | # TODO(maruel): Implement relative open if necessary instead of the |
| 1155 | # AT_FDCWD flag, let's hope not since this means tracking all active |
| 1156 | # directory handles. |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1157 | raise NotImplementedError('Relative open via openat not implemented.') |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1158 | |
Marc-Antoine Ruel | aa4fff5 | 2014-10-24 09:10:13 -0400 | [diff] [blame] | 1159 | @parse_args( |
| 1160 | r'^(\".+?\"|0x[a-f0-9]+), (?:\".+?\"(?:\.\.\.)?|0x[a-f0-9]+), ' |
| 1161 | '\d+$', |
| 1162 | False) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1163 | def handle_readlink(self, args, _result): |
Marc-Antoine Ruel | aa4fff5 | 2014-10-24 09:10:13 -0400 | [diff] [blame] | 1164 | # TODO(maruel): Resolve file handle. |
| 1165 | if not args[0].startswith('0x'): |
| 1166 | self._handle_file(args[0][1:-1], Results.File.READ) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1167 | |
| 1168 | @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True) |
| 1169 | def handle_rename(self, args, _result): |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1170 | self._handle_file(args[0], Results.File.READ) |
| 1171 | self._handle_file(args[1], Results.File.WRITE) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1172 | |
| 1173 | def handle_rmdir(self, _args, _result): |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1174 | # We track content, not directories. |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1175 | pass |
| 1176 | |
| 1177 | def handle_setxattr(self, _args, _result): |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1178 | # TODO(maruel): self._handle_file(args[0], Results.File.WRITE) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1179 | pass |
| 1180 | |
Marc-Antoine Ruel | aa4fff5 | 2014-10-24 09:10:13 -0400 | [diff] [blame] | 1181 | @parse_args(r'^(\".+?\"|0x[a-f0-9]+), \{.+?\}$', True) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1182 | def handle_stat(self, args, _result): |
Marc-Antoine Ruel | aa4fff5 | 2014-10-24 09:10:13 -0400 | [diff] [blame] | 1183 | # TODO(maruel): Resolve file handle. |
| 1184 | if not args[0].startswith('0x'): |
| 1185 | self._handle_file(args[0][1:-1], Results.File.TOUCHED) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1186 | |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1187 | @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True) |
| 1188 | def handle_symlink(self, args, _result): |
| 1189 | self._handle_file(args[0], Results.File.TOUCHED) |
| 1190 | self._handle_file(args[1], Results.File.WRITE) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1191 | |
csharp@chromium.org | fa67292 | 2012-11-02 17:43:31 +0000 | [diff] [blame] | 1192 | @parse_args(r'^\"(.+?)\", \d+', True) |
| 1193 | def handle_truncate(self, args, _result): |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1194 | self._handle_file(args[0], Results.File.WRITE) |
csharp@chromium.org | fa67292 | 2012-11-02 17:43:31 +0000 | [diff] [blame] | 1195 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1196 | def handle_unlink(self, _args, _result): |
| 1197 | # In theory, the file had to be created anyway. |
| 1198 | pass |
| 1199 | |
csharp@chromium.org | 11ae067 | 2012-10-30 14:53:17 +0000 | [diff] [blame] | 1200 | def handle_unlinkat(self, _args, _result): |
| 1201 | # In theory, the file had to be created anyway. |
| 1202 | pass |
| 1203 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1204 | def handle_statfs(self, _args, _result): |
| 1205 | pass |
| 1206 | |
csharp@chromium.org | 11ae067 | 2012-10-30 14:53:17 +0000 | [diff] [blame] | 1207 | def handle_utimensat(self, _args, _result): |
| 1208 | pass |
| 1209 | |
maruel@chromium.org | 588e09d | 2013-05-21 14:38:57 +0000 | [diff] [blame] | 1210 | def handle_vfork(self, _args, result): |
| 1211 | self._handling_forking('vfork', result) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1212 | |
| 1213 | @staticmethod |
| 1214 | def _handle_unknown(function, args, result): |
| 1215 | raise TracingFailure( |
| 1216 | 'Unexpected/unimplemented trace %s(%s)= %s' % |
| 1217 | (function, args, result), |
| 1218 | None, None, None) |
| 1219 | |
maruel@chromium.org | 588e09d | 2013-05-21 14:38:57 +0000 | [diff] [blame] | 1220 | def _handling_forking(self, name, result): |
| 1221 | """Transfers cwd.""" |
| 1222 | if result.startswith(('?', '-1')): |
| 1223 | # The call failed. |
| 1224 | return |
| 1225 | # Update the other process right away. |
| 1226 | childpid = int(result) |
| 1227 | child = self._root().get_or_set_proc(childpid) |
| 1228 | if child.parentid is not None or childpid in self.children: |
| 1229 | raise TracingFailure( |
| 1230 | 'Found internal inconsitency in process lifetime detection ' |
| 1231 | 'during a %s() call' % name, |
| 1232 | None, None, None) |
| 1233 | |
| 1234 | # Copy the cwd object. |
| 1235 | child.initial_cwd = self.get_cwd() |
| 1236 | child.parentid = self.pid |
| 1237 | # It is necessary because the logs are processed out of order. |
| 1238 | self.children.append(child) |
| 1239 | |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1240 | def _handle_file(self, filepath, mode): |
maruel@chromium.org | bea76be | 2013-06-03 20:59:25 +0000 | [diff] [blame] | 1241 | filepath = self._mangle(filepath) |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1242 | self.add_file(filepath, mode) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1243 | |
maruel@chromium.org | bea76be | 2013-06-03 20:59:25 +0000 | [diff] [blame] | 1244 | def _mangle(self, filepath): |
| 1245 | """Decodes a filepath found in the log and convert it to a late-bound |
| 1246 | path if necessary. |
| 1247 | |
| 1248 | |filepath| is an strace 'encoded' string and the returned value is |
| 1249 | either an unicode string if the path was absolute or a late bound path |
| 1250 | otherwise. |
| 1251 | """ |
| 1252 | filepath = Strace.load_filename(filepath) |
| 1253 | if os.path.isabs(filepath): |
| 1254 | return filepath |
| 1255 | else: |
| 1256 | if isinstance(self.get_cwd(), unicode): |
| 1257 | return os.path.normpath(os.path.join(self.get_cwd(), filepath)) |
| 1258 | return self.RelativePath(self.get_cwd(), filepath) |
| 1259 | |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1260 | def __init__(self, blacklist, root_pid, initial_cwd): |
| 1261 | """|root_pid| may be None when the root process is not known. |
| 1262 | |
| 1263 | In that case, a search is done after reading all the logs to figure out |
| 1264 | the root process. |
| 1265 | """ |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1266 | super(Strace.Context, self).__init__(blacklist) |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 1267 | assert_is_renderable(initial_cwd) |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1268 | self.root_pid = root_pid |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1269 | self.initial_cwd = initial_cwd |
| 1270 | |
| 1271 | def render(self): |
| 1272 | """Returns the string value of the initial cwd of the root process. |
| 1273 | |
| 1274 | Used by RelativePath. |
| 1275 | """ |
| 1276 | return self.initial_cwd |
| 1277 | |
| 1278 | def on_line(self, pid, line): |
| 1279 | """Transfers control into the Process.on_line() function.""" |
| 1280 | self.get_or_set_proc(pid).on_line(line.strip()) |
| 1281 | |
| 1282 | def to_results(self): |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1283 | """If necessary, finds back the root process and verify consistency.""" |
| 1284 | if not self.root_pid: |
| 1285 | # The non-sudo case. The traced process was started by strace itself, |
| 1286 | # so the pid of the traced process is not known. |
| 1287 | root = [p for p in self._process_lookup.itervalues() if not p.parentid] |
| 1288 | if len(root) == 1: |
| 1289 | self.root_process = root[0] |
| 1290 | # Save it for later. |
| 1291 | self.root_pid = self.root_process.pid |
| 1292 | else: |
| 1293 | # The sudo case. The traced process was started manually so its pid is |
| 1294 | # known. |
| 1295 | self.root_process = self._process_lookup.get(self.root_pid) |
| 1296 | if not self.root_process: |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1297 | raise TracingFailure( |
| 1298 | 'Found internal inconsitency in process lifetime detection ' |
| 1299 | 'while finding the root process', |
| 1300 | None, |
| 1301 | None, |
| 1302 | None, |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1303 | self.root_pid, |
| 1304 | sorted(self._process_lookup)) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1305 | process = self.root_process.to_results_process() |
| 1306 | if sorted(self._process_lookup) != sorted(p.pid for p in process.all): |
| 1307 | raise TracingFailure( |
| 1308 | 'Found internal inconsitency in process lifetime detection ' |
| 1309 | 'while looking for len(tree) == len(list)', |
| 1310 | None, |
| 1311 | None, |
| 1312 | None, |
| 1313 | sorted(self._process_lookup), |
| 1314 | sorted(p.pid for p in process.all)) |
| 1315 | return Results(process) |
| 1316 | |
| 1317 | def get_or_set_proc(self, pid): |
| 1318 | """Returns the Context.Process instance for this pid or creates a new one. |
| 1319 | """ |
| 1320 | if not pid or not isinstance(pid, int): |
| 1321 | raise TracingFailure( |
| 1322 | 'Unpexpected value for pid: %r' % pid, |
| 1323 | pid, |
| 1324 | None, |
| 1325 | None, |
| 1326 | pid) |
| 1327 | if pid not in self._process_lookup: |
| 1328 | self._process_lookup[pid] = self.Process(self, pid) |
| 1329 | return self._process_lookup[pid] |
| 1330 | |
| 1331 | @classmethod |
| 1332 | def traces(cls): |
| 1333 | """Returns the list of all handled traces to pass this as an argument to |
| 1334 | strace. |
| 1335 | """ |
| 1336 | prefix = 'handle_' |
| 1337 | return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)] |
| 1338 | |
| 1339 | class Tracer(ApiBase.Tracer): |
| 1340 | MAX_LEN = 256 |
| 1341 | |
maruel@chromium.org | f14633b | 2013-01-25 15:12:52 +0000 | [diff] [blame] | 1342 | def __init__(self, logname, use_sudo): |
| 1343 | super(Strace.Tracer, self).__init__(logname) |
| 1344 | self.use_sudo = use_sudo |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1345 | if use_sudo: |
| 1346 | # TODO(maruel): Use the jump script systematically to make it easy to |
| 1347 | # figure out the root process, so RelativePath is not necessary anymore. |
maruel@chromium.org | 4c9b4fa | 2013-05-29 19:22:14 +0000 | [diff] [blame] | 1348 | self._child_script = create_exec_thunk() |
| 1349 | self._scripts_to_cleanup.append(self._child_script) |
maruel@chromium.org | f14633b | 2013-01-25 15:12:52 +0000 | [diff] [blame] | 1350 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1351 | def trace(self, cmd, cwd, tracename, output): |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1352 | """Runs strace on an executable. |
| 1353 | |
| 1354 | When use_sudo=True, it is a 3-phases process: start the thunk, start |
| 1355 | sudo strace with the pid of the thunk and then have the thunk os.execve() |
| 1356 | the process to trace. |
| 1357 | """ |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1358 | logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output)) |
| 1359 | assert os.path.isabs(cmd[0]), cmd[0] |
| 1360 | assert os.path.isabs(cwd), cwd |
| 1361 | assert os.path.normpath(cwd) == cwd, cwd |
| 1362 | with self._lock: |
| 1363 | if not self._initialized: |
| 1364 | raise TracingFailure( |
| 1365 | 'Called Tracer.trace() on an unitialized object', |
| 1366 | None, None, None, tracename) |
| 1367 | assert tracename not in (i['trace'] for i in self._traces) |
| 1368 | stdout = stderr = None |
| 1369 | if output: |
| 1370 | stdout = subprocess.PIPE |
| 1371 | stderr = subprocess.STDOUT |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1372 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1373 | # Ensure all file related APIs are hooked. |
| 1374 | traces = ','.join(Strace.Context.traces() + ['file']) |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1375 | flags = [ |
maruel@chromium.org | a5cfdc3 | 2013-05-28 14:46:15 +0000 | [diff] [blame] | 1376 | # Each child process has its own trace file. It is necessary because |
| 1377 | # strace may generate corrupted log file if multiple processes are |
| 1378 | # heavily doing syscalls simultaneously. |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1379 | '-ff', |
maruel@chromium.org | a5cfdc3 | 2013-05-28 14:46:15 +0000 | [diff] [blame] | 1380 | # Reduce whitespace usage. |
| 1381 | '-a1', |
| 1382 | # hex encode non-ascii strings. |
| 1383 | # TODO(maruel): '-x', |
| 1384 | # TODO(maruel): '-ttt', |
| 1385 | # Signals are unnecessary noise here. Note the parser can cope with them |
| 1386 | # but reduce the unnecessary output. |
| 1387 | '-esignal=none', |
| 1388 | # Print as much data as wanted. |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1389 | '-s', '%d' % self.MAX_LEN, |
| 1390 | '-e', 'trace=%s' % traces, |
| 1391 | '-o', self._logname + '.' + tracename, |
| 1392 | ] |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1393 | |
| 1394 | if self.use_sudo: |
| 1395 | pipe_r, pipe_w = os.pipe() |
| 1396 | # Start the child process paused. |
maruel@chromium.org | 4c9b4fa | 2013-05-29 19:22:14 +0000 | [diff] [blame] | 1397 | target_cmd = [sys.executable, self._child_script, str(pipe_r)] + cmd |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1398 | logging.debug(' '.join(target_cmd)) |
| 1399 | child_proc = subprocess.Popen( |
| 1400 | target_cmd, |
| 1401 | stdin=subprocess.PIPE, |
| 1402 | stdout=stdout, |
| 1403 | stderr=stderr, |
| 1404 | cwd=cwd) |
| 1405 | |
| 1406 | # TODO(maruel): both processes must use the same UID for it to work |
| 1407 | # without sudo. Look why -p is failing at the moment without sudo. |
| 1408 | trace_cmd = [ |
| 1409 | 'sudo', |
| 1410 | 'strace', |
| 1411 | '-p', str(child_proc.pid), |
| 1412 | ] + flags |
| 1413 | logging.debug(' '.join(trace_cmd)) |
| 1414 | strace_proc = subprocess.Popen( |
| 1415 | trace_cmd, |
| 1416 | cwd=cwd, |
| 1417 | stdin=subprocess.PIPE, |
| 1418 | stdout=subprocess.PIPE, |
| 1419 | stderr=subprocess.PIPE) |
| 1420 | |
| 1421 | line = strace_proc.stderr.readline() |
| 1422 | if not re.match(r'^Process \d+ attached \- interrupt to quit$', line): |
| 1423 | # TODO(maruel): Raise an exception. |
| 1424 | assert False, line |
| 1425 | |
| 1426 | # Now fire the child process. |
| 1427 | os.write(pipe_w, 'x') |
| 1428 | |
| 1429 | out = child_proc.communicate()[0] |
| 1430 | strace_out = strace_proc.communicate()[0] |
| 1431 | |
| 1432 | # TODO(maruel): if strace_proc.returncode: Add an exception. |
| 1433 | saved_out = strace_out if strace_proc.returncode else out |
| 1434 | root_pid = child_proc.pid |
| 1435 | else: |
| 1436 | # Non-sudo case. |
| 1437 | trace_cmd = [ |
| 1438 | 'strace', |
| 1439 | ] + flags + cmd |
| 1440 | logging.debug(' '.join(trace_cmd)) |
| 1441 | child_proc = subprocess.Popen( |
| 1442 | trace_cmd, |
| 1443 | cwd=cwd, |
| 1444 | stdin=subprocess.PIPE, |
| 1445 | stdout=stdout, |
| 1446 | stderr=stderr) |
| 1447 | out = child_proc.communicate()[0] |
| 1448 | # TODO(maruel): Walk the logs and figure out the root process would |
| 1449 | # simplify parsing the logs a *lot*. |
| 1450 | saved_out = out |
| 1451 | # The trace reader will have to figure out. |
| 1452 | root_pid = None |
| 1453 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1454 | with self._lock: |
| 1455 | assert tracename not in (i['trace'] for i in self._traces) |
| 1456 | self._traces.append( |
| 1457 | { |
| 1458 | 'cmd': cmd, |
| 1459 | 'cwd': cwd, |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1460 | 'output': saved_out, |
| 1461 | 'pid': root_pid, |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1462 | 'trace': tracename, |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1463 | }) |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1464 | return child_proc.returncode, out |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1465 | |
maruel@chromium.org | f14633b | 2013-01-25 15:12:52 +0000 | [diff] [blame] | 1466 | def __init__(self, use_sudo=None): |
| 1467 | super(Strace, self).__init__() |
| 1468 | self.use_sudo = use_sudo |
| 1469 | |
| 1470 | def get_tracer(self, logname): |
| 1471 | return self.Tracer(logname, self.use_sudo) |
| 1472 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1473 | @staticmethod |
| 1474 | def clean_trace(logname): |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 1475 | if fs.isfile(logname): |
| 1476 | fs.remove(logname) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1477 | # Also delete any pid specific file from previous traces. |
| 1478 | for i in glob.iglob(logname + '.*'): |
| 1479 | if i.rsplit('.', 1)[1].isdigit(): |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 1480 | fs.remove(i) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1481 | |
| 1482 | @classmethod |
maruel@chromium.org | ec74ff8 | 2012-10-29 18:14:47 +0000 | [diff] [blame] | 1483 | def parse_log(cls, logname, blacklist, trace_name): |
| 1484 | logging.info('parse_log(%s, ..., %s)', logname, trace_name) |
maruel@chromium.org | 8b056ba | 2012-10-16 14:04:49 +0000 | [diff] [blame] | 1485 | assert os.path.isabs(logname) |
Marc-Antoine Ruel | de01180 | 2013-11-12 15:19:47 -0500 | [diff] [blame] | 1486 | data = tools.read_json(logname) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1487 | out = [] |
| 1488 | for item in data['traces']: |
maruel@chromium.org | ec74ff8 | 2012-10-29 18:14:47 +0000 | [diff] [blame] | 1489 | if trace_name and item['trace'] != trace_name: |
| 1490 | continue |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1491 | result = { |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1492 | 'output': item['output'], |
maruel@chromium.org | 964ddde | 2012-10-29 18:13:46 +0000 | [diff] [blame] | 1493 | 'trace': item['trace'], |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1494 | } |
| 1495 | try: |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1496 | context = cls.Context(blacklist, item['pid'], item['cwd']) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1497 | for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])): |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1498 | logging.debug('Reading %s', pidfile) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1499 | pid = pidfile.rsplit('.', 1)[1] |
| 1500 | if pid.isdigit(): |
| 1501 | pid = int(pid) |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1502 | found_line = False |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1503 | for line in open(pidfile, 'rb'): |
| 1504 | context.on_line(pid, line) |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1505 | found_line = True |
| 1506 | if not found_line: |
| 1507 | # Ensures that a completely empty trace still creates the |
| 1508 | # corresponding Process instance by logging a dummy line. |
| 1509 | context.on_line(pid, '') |
| 1510 | else: |
| 1511 | logging.warning('Found unexpected file %s', pidfile) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1512 | result['results'] = context.to_results() |
maruel@chromium.org | 424f569 | 2012-10-15 15:42:38 +0000 | [diff] [blame] | 1513 | except TracingFailure: |
| 1514 | result['exception'] = sys.exc_info() |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1515 | out.append(result) |
| 1516 | return out |
| 1517 | |
| 1518 | |
| 1519 | class Dtrace(ApiBase): |
| 1520 | """Uses DTrace framework through dtrace. Requires root access. |
| 1521 | |
| 1522 | Implies Mac OSX. |
| 1523 | |
| 1524 | dtruss can't be used because it has compatibility issues with python. |
| 1525 | |
| 1526 | Also, the pid->cwd handling needs to be done manually since OSX has no way to |
| 1527 | get the absolute path of the 'cwd' dtrace variable from the probe. |
| 1528 | |
| 1529 | Also, OSX doesn't populate curpsinfo->pr_psargs properly, see |
| 1530 | https://discussions.apple.com/thread/1980539. So resort to handling execve() |
| 1531 | manually. |
| 1532 | |
| 1533 | errno is not printed in the log since this implementation currently only cares |
| 1534 | about files that were successfully opened. |
| 1535 | """ |
| 1536 | class Context(ApiBase.Context): |
| 1537 | # Format: index pid function(args) |
| 1538 | RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$') |
| 1539 | |
| 1540 | # Arguments parsing. |
| 1541 | RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$') |
| 1542 | RE_CHDIR = re.compile(r'^\"(.+?)\"$') |
| 1543 | RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$') |
| 1544 | RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$') |
| 1545 | RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$') |
| 1546 | RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') |
| 1547 | |
maruel@chromium.org | ac36116 | 2013-06-04 15:54:06 +0000 | [diff] [blame] | 1548 | # O_DIRECTORY is not defined on Windows and dtrace doesn't exist on Windows. |
| 1549 | O_DIRECTORY = os.O_DIRECTORY if hasattr(os, 'O_DIRECTORY') else None |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1550 | O_RDWR = os.O_RDWR |
| 1551 | O_WRONLY = os.O_WRONLY |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1552 | |
| 1553 | class Process(ApiBase.Context.Process): |
| 1554 | def __init__(self, *args): |
| 1555 | super(Dtrace.Context.Process, self).__init__(*args) |
| 1556 | self.cwd = self.initial_cwd |
| 1557 | |
maruel@chromium.org | 19e9fcd | 2012-10-29 18:24:02 +0000 | [diff] [blame] | 1558 | def __init__(self, blacklist, thunk_pid, initial_cwd): |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1559 | logging.info( |
maruel@chromium.org | 19e9fcd | 2012-10-29 18:24:02 +0000 | [diff] [blame] | 1560 | '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd)) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1561 | super(Dtrace.Context, self).__init__(blacklist) |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 1562 | assert isinstance(initial_cwd, unicode), initial_cwd |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1563 | # Process ID of the temporary script created by create_subprocess_thunk(). |
maruel@chromium.org | 19e9fcd | 2012-10-29 18:24:02 +0000 | [diff] [blame] | 1564 | self._thunk_pid = thunk_pid |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1565 | self._initial_cwd = initial_cwd |
| 1566 | self._line_number = 0 |
| 1567 | |
| 1568 | def on_line(self, line): |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 1569 | assert isinstance(line, unicode), line |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1570 | self._line_number += 1 |
| 1571 | match = self.RE_HEADER.match(line) |
| 1572 | if not match: |
| 1573 | raise TracingFailure( |
| 1574 | 'Found malformed line: %s' % line, |
| 1575 | None, |
| 1576 | self._line_number, |
| 1577 | line) |
| 1578 | fn = getattr( |
| 1579 | self, |
| 1580 | 'handle_%s' % match.group(2).replace('-', '_'), |
| 1581 | self._handle_ignored) |
| 1582 | # It is guaranteed to succeed because of the regexp. Or at least I thought |
| 1583 | # it would. |
| 1584 | pid = int(match.group(1)) |
| 1585 | try: |
| 1586 | return fn(pid, match.group(3)) |
| 1587 | except TracingFailure, e: |
| 1588 | # Hack in the values since the handler could be a static function. |
| 1589 | e.pid = pid |
| 1590 | e.line = line |
| 1591 | e.line_number = self._line_number |
| 1592 | # Re-raise the modified exception. |
| 1593 | raise |
| 1594 | except (KeyError, NotImplementedError, ValueError), e: |
| 1595 | raise TracingFailure( |
| 1596 | 'Trace generated a %s exception: %s' % ( |
| 1597 | e.__class__.__name__, str(e)), |
| 1598 | pid, |
| 1599 | self._line_number, |
| 1600 | line, |
| 1601 | e) |
| 1602 | |
| 1603 | def to_results(self): |
| 1604 | process = self.root_process.to_results_process() |
| 1605 | # Internal concistency check. |
| 1606 | if sorted(self._process_lookup) != sorted(p.pid for p in process.all): |
| 1607 | raise TracingFailure( |
| 1608 | 'Found internal inconsitency in process lifetime detection ' |
| 1609 | 'while looking for len(tree) == len(list)', |
| 1610 | None, |
| 1611 | None, |
| 1612 | None, |
| 1613 | sorted(self._process_lookup), |
| 1614 | sorted(p.pid for p in process.all)) |
| 1615 | return Results(process) |
| 1616 | |
| 1617 | def handle_dtrace_BEGIN(self, _pid, args): |
| 1618 | if not self.RE_DTRACE_BEGIN.match(args): |
| 1619 | raise TracingFailure( |
| 1620 | 'Found internal inconsitency in dtrace_BEGIN log line', |
| 1621 | None, None, None) |
| 1622 | |
| 1623 | def handle_proc_start(self, pid, args): |
| 1624 | """Transfers cwd. |
| 1625 | |
| 1626 | The dtrace script already takes care of only tracing the processes that |
| 1627 | are child of the traced processes so there is no need to verify the |
| 1628 | process hierarchy. |
| 1629 | """ |
| 1630 | if pid in self._process_lookup: |
| 1631 | raise TracingFailure( |
| 1632 | 'Found internal inconsitency in proc_start: %d started two times' % |
| 1633 | pid, |
| 1634 | None, None, None) |
| 1635 | match = self.RE_PROC_START.match(args) |
| 1636 | if not match: |
| 1637 | raise TracingFailure( |
| 1638 | 'Failed to parse arguments: %s' % args, |
| 1639 | None, None, None) |
| 1640 | ppid = int(match.group(1)) |
maruel@chromium.org | 19e9fcd | 2012-10-29 18:24:02 +0000 | [diff] [blame] | 1641 | if ppid == self._thunk_pid and not self.root_process: |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1642 | proc = self.root_process = self.Process( |
| 1643 | self.blacklist, pid, self._initial_cwd) |
| 1644 | elif ppid in self._process_lookup: |
| 1645 | proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd) |
| 1646 | self._process_lookup[ppid].children.append(proc) |
| 1647 | else: |
| 1648 | # Another process tree, ignore. |
| 1649 | return |
| 1650 | self._process_lookup[pid] = proc |
| 1651 | logging.debug( |
| 1652 | 'New child: %s -> %d cwd:%s' % |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 1653 | (ppid, pid, render(proc.initial_cwd))) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1654 | |
| 1655 | def handle_proc_exit(self, pid, _args): |
| 1656 | """Removes cwd.""" |
| 1657 | if pid in self._process_lookup: |
maruel@chromium.org | 19e9fcd | 2012-10-29 18:24:02 +0000 | [diff] [blame] | 1658 | # self._thunk_pid is not traced itself and other traces run neither. |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1659 | self._process_lookup[pid].cwd = None |
| 1660 | |
| 1661 | def handle_execve(self, pid, args): |
| 1662 | """Sets the process' executable. |
| 1663 | |
| 1664 | TODO(maruel): Read command line arguments. See |
| 1665 | https://discussions.apple.com/thread/1980539 for an example. |
| 1666 | https://gist.github.com/1242279 |
| 1667 | |
| 1668 | Will have to put the answer at http://stackoverflow.com/questions/7556249. |
| 1669 | :) |
| 1670 | """ |
| 1671 | if not pid in self._process_lookup: |
| 1672 | # Another process tree, ignore. |
| 1673 | return |
| 1674 | match = self.RE_EXECVE.match(args) |
| 1675 | if not match: |
| 1676 | raise TracingFailure( |
| 1677 | 'Failed to parse arguments: %r' % args, |
| 1678 | None, None, None) |
| 1679 | proc = self._process_lookup[pid] |
| 1680 | proc.executable = match.group(1) |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1681 | self._handle_file(pid, proc.executable, Results.File.READ) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1682 | proc.command = self.process_escaped_arguments(match.group(3)) |
| 1683 | if int(match.group(2)) != len(proc.command): |
| 1684 | raise TracingFailure( |
| 1685 | 'Failed to parse execve() arguments: %s' % args, |
| 1686 | None, None, None) |
| 1687 | |
| 1688 | def handle_chdir(self, pid, args): |
| 1689 | """Updates cwd.""" |
| 1690 | if pid not in self._process_lookup: |
| 1691 | # Another process tree, ignore. |
| 1692 | return |
| 1693 | cwd = self.RE_CHDIR.match(args).group(1) |
| 1694 | if not cwd.startswith('/'): |
| 1695 | cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd) |
| 1696 | logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2)) |
| 1697 | else: |
| 1698 | logging.debug('handle_chdir(%d, %s)' % (pid, cwd)) |
| 1699 | cwd2 = cwd |
| 1700 | self._process_lookup[pid].cwd = cwd2 |
| 1701 | |
| 1702 | def handle_open_nocancel(self, pid, args): |
| 1703 | """Redirects to handle_open().""" |
| 1704 | return self.handle_open(pid, args) |
| 1705 | |
| 1706 | def handle_open(self, pid, args): |
| 1707 | if pid not in self._process_lookup: |
| 1708 | # Another process tree, ignore. |
| 1709 | return |
| 1710 | match = self.RE_OPEN.match(args) |
| 1711 | if not match: |
| 1712 | raise TracingFailure( |
| 1713 | 'Failed to parse arguments: %s' % args, |
| 1714 | None, None, None) |
| 1715 | flag = int(match.group(2), 16) |
| 1716 | if self.O_DIRECTORY & flag == self.O_DIRECTORY: |
| 1717 | # Ignore directories. |
| 1718 | return |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1719 | self._handle_file( |
| 1720 | pid, |
| 1721 | match.group(1), |
| 1722 | Results.File.READ if not ((self.O_RDWR | self.O_WRONLY) & flag) |
| 1723 | else Results.File.WRITE) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1724 | |
| 1725 | def handle_rename(self, pid, args): |
| 1726 | if pid not in self._process_lookup: |
| 1727 | # Another process tree, ignore. |
| 1728 | return |
| 1729 | match = self.RE_RENAME.match(args) |
| 1730 | if not match: |
| 1731 | raise TracingFailure( |
| 1732 | 'Failed to parse arguments: %s' % args, |
| 1733 | None, None, None) |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1734 | self._handle_file(pid, match.group(1), Results.File.READ) |
| 1735 | self._handle_file(pid, match.group(2), Results.File.WRITE) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1736 | |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1737 | def _handle_file(self, pid, filepath, mode): |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1738 | if not filepath.startswith('/'): |
| 1739 | filepath = os.path.join(self._process_lookup[pid].cwd, filepath) |
| 1740 | # We can get '..' in the path. |
| 1741 | filepath = os.path.normpath(filepath) |
| 1742 | # Sadly, still need to filter out directories here; |
| 1743 | # saw open_nocancel(".", 0, 0) = 0 lines. |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 1744 | if fs.isdir(filepath): |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1745 | return |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 1746 | self._process_lookup[pid].add_file(filepath, mode) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1747 | |
| 1748 | def handle_ftruncate(self, pid, args): |
| 1749 | """Just used as a signal to kill dtrace, ignoring.""" |
| 1750 | pass |
| 1751 | |
| 1752 | @staticmethod |
| 1753 | def _handle_ignored(pid, args): |
| 1754 | """Is called for all the event traces that are not handled.""" |
| 1755 | raise NotImplementedError('Please implement me') |
| 1756 | |
| 1757 | @staticmethod |
| 1758 | def process_escaped_arguments(text): |
| 1759 | """Extracts escaped arguments on a string and return the arguments as a |
| 1760 | list. |
| 1761 | |
| 1762 | Implemented as an automaton. |
| 1763 | |
| 1764 | Example: |
| 1765 | With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the |
| 1766 | function will return ['python2.7', '-c', 'print("hi")] |
| 1767 | """ |
| 1768 | if not text.endswith('\\0'): |
| 1769 | raise ValueError('String is not null terminated: %r' % text, text) |
| 1770 | text = text[:-2] |
| 1771 | |
| 1772 | def unescape(x): |
| 1773 | """Replaces '\\' with '\' and '\?' (where ? is anything) with ?.""" |
| 1774 | out = [] |
| 1775 | escaped = False |
| 1776 | for i in x: |
| 1777 | if i == '\\' and not escaped: |
| 1778 | escaped = True |
| 1779 | continue |
| 1780 | escaped = False |
| 1781 | out.append(i) |
| 1782 | return ''.join(out) |
| 1783 | |
| 1784 | return [unescape(i) for i in text.split('\\001')] |
| 1785 | |
| 1786 | class Tracer(ApiBase.Tracer): |
| 1787 | # pylint: disable=C0301 |
| 1788 | # |
| 1789 | # To understand the following code, you'll want to take a look at: |
| 1790 | # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html |
| 1791 | # https://wikis.oracle.com/display/DTrace/Variables |
| 1792 | # http://docs.oracle.com/cd/E19205-01/820-4221/ |
| 1793 | # |
| 1794 | # 0. Dump all the valid probes into a text file. It is important, you |
| 1795 | # want to redirect into a file and you don't want to constantly 'sudo'. |
| 1796 | # $ sudo dtrace -l > probes.txt |
| 1797 | # |
| 1798 | # 1. Count the number of probes: |
| 1799 | # $ wc -l probes.txt |
| 1800 | # 81823 # On OSX 10.7, including 1 header line. |
| 1801 | # |
| 1802 | # 2. List providers, intentionally skipping all the 'syspolicy10925' and the |
| 1803 | # likes and skipping the header with NR>1: |
| 1804 | # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]' |
| 1805 | # dtrace |
| 1806 | # fbt |
| 1807 | # io |
| 1808 | # ip |
| 1809 | # lockstat |
| 1810 | # mach_trap |
| 1811 | # proc |
| 1812 | # profile |
| 1813 | # sched |
| 1814 | # syscall |
| 1815 | # tcp |
| 1816 | # vminfo |
| 1817 | # |
| 1818 | # 3. List of valid probes: |
| 1819 | # $ grep syscall probes.txt | less |
| 1820 | # or use dtrace directly: |
| 1821 | # $ sudo dtrace -l -P syscall | less |
| 1822 | # |
| 1823 | # trackedpid is an associative array where its value can be 0, 1 or 2. |
| 1824 | # 0 is for untracked processes and is the default value for items not |
| 1825 | # in the associative array. |
| 1826 | # 1 is for tracked processes. |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 1827 | # 2 is for the script created by create_subprocess_thunk() only. It is not |
| 1828 | # tracked itself but all its decendants are. |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 1829 | # |
| 1830 | # The script will kill itself only once waiting_to_die == 1 and |
| 1831 | # current_processes == 0, so that both getlogin() was called and that |
| 1832 | # all traced processes exited. |
| 1833 | # |
| 1834 | # TODO(maruel): Use cacheable predicates. See |
| 1835 | # https://wikis.oracle.com/display/DTrace/Performance+Considerations |
| 1836 | D_CODE = """ |
| 1837 | dtrace:::BEGIN { |
| 1838 | waiting_to_die = 0; |
| 1839 | current_processes = 0; |
| 1840 | logindex = 0; |
| 1841 | printf("%d %d %s_%s(\\"%s\\")\\n", |
| 1842 | logindex, PID, probeprov, probename, SCRIPT); |
| 1843 | logindex++; |
| 1844 | } |
| 1845 | |
| 1846 | proc:::start /trackedpid[ppid]/ { |
| 1847 | trackedpid[pid] = 1; |
| 1848 | current_processes += 1; |
| 1849 | printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n", |
| 1850 | logindex, pid, probeprov, probename, |
| 1851 | ppid, |
| 1852 | execname, |
| 1853 | current_processes); |
| 1854 | logindex++; |
| 1855 | } |
| 1856 | /* Should use SCRIPT but there is no access to this variable at that |
| 1857 | * point. */ |
| 1858 | proc:::start /ppid == PID && execname == "Python"/ { |
| 1859 | trackedpid[pid] = 2; |
| 1860 | current_processes += 1; |
| 1861 | printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n", |
| 1862 | logindex, pid, probeprov, probename, |
| 1863 | ppid, |
| 1864 | execname, |
| 1865 | current_processes); |
| 1866 | logindex++; |
| 1867 | } |
| 1868 | proc:::exit /trackedpid[pid] && |
| 1869 | current_processes == 1 && |
| 1870 | waiting_to_die == 1/ { |
| 1871 | trackedpid[pid] = 0; |
| 1872 | current_processes -= 1; |
| 1873 | printf("%d %d %s_%s(%d)\\n", |
| 1874 | logindex, pid, probeprov, probename, |
| 1875 | current_processes); |
| 1876 | logindex++; |
| 1877 | exit(0); |
| 1878 | } |
| 1879 | proc:::exit /trackedpid[pid]/ { |
| 1880 | trackedpid[pid] = 0; |
| 1881 | current_processes -= 1; |
| 1882 | printf("%d %d %s_%s(%d)\\n", |
| 1883 | logindex, pid, probeprov, probename, |
| 1884 | current_processes); |
| 1885 | logindex++; |
| 1886 | } |
| 1887 | |
| 1888 | /* Use an arcane function to detect when we need to die */ |
| 1889 | syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ { |
| 1890 | waiting_to_die = 1; |
| 1891 | printf("%d %d %s()\\n", logindex, pid, probefunc); |
| 1892 | logindex++; |
| 1893 | } |
| 1894 | syscall::ftruncate:entry / |
| 1895 | pid == PID && arg0 == FILE_ID && current_processes == 0/ { |
| 1896 | exit(0); |
| 1897 | } |
| 1898 | |
| 1899 | syscall::open*:entry /trackedpid[pid] == 1/ { |
| 1900 | self->open_arg0 = arg0; |
| 1901 | self->open_arg1 = arg1; |
| 1902 | self->open_arg2 = arg2; |
| 1903 | } |
| 1904 | syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ { |
| 1905 | this->open_arg0 = copyinstr(self->open_arg0); |
| 1906 | printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n", |
| 1907 | logindex, pid, probefunc, |
| 1908 | this->open_arg0, |
| 1909 | self->open_arg1, |
| 1910 | self->open_arg2); |
| 1911 | logindex++; |
| 1912 | this->open_arg0 = 0; |
| 1913 | } |
| 1914 | syscall::open*:return /trackedpid[pid] == 1/ { |
| 1915 | self->open_arg0 = 0; |
| 1916 | self->open_arg1 = 0; |
| 1917 | self->open_arg2 = 0; |
| 1918 | } |
| 1919 | |
| 1920 | syscall::rename:entry /trackedpid[pid] == 1/ { |
| 1921 | self->rename_arg0 = arg0; |
| 1922 | self->rename_arg1 = arg1; |
| 1923 | } |
| 1924 | syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ { |
| 1925 | this->rename_arg0 = copyinstr(self->rename_arg0); |
| 1926 | this->rename_arg1 = copyinstr(self->rename_arg1); |
| 1927 | printf("%d %d %s(\\"%s\\", \\"%s\\")\\n", |
| 1928 | logindex, pid, probefunc, |
| 1929 | this->rename_arg0, |
| 1930 | this->rename_arg1); |
| 1931 | logindex++; |
| 1932 | this->rename_arg0 = 0; |
| 1933 | this->rename_arg1 = 0; |
| 1934 | } |
| 1935 | syscall::rename:return /trackedpid[pid] == 1/ { |
| 1936 | self->rename_arg0 = 0; |
| 1937 | self->rename_arg1 = 0; |
| 1938 | } |
| 1939 | |
| 1940 | /* Track chdir, it's painful because it is only receiving relative path. |
| 1941 | */ |
| 1942 | syscall::chdir:entry /trackedpid[pid] == 1/ { |
| 1943 | self->chdir_arg0 = arg0; |
| 1944 | } |
| 1945 | syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ { |
| 1946 | this->chdir_arg0 = copyinstr(self->chdir_arg0); |
| 1947 | printf("%d %d %s(\\"%s\\")\\n", |
| 1948 | logindex, pid, probefunc, |
| 1949 | this->chdir_arg0); |
| 1950 | logindex++; |
| 1951 | this->chdir_arg0 = 0; |
| 1952 | } |
| 1953 | syscall::chdir:return /trackedpid[pid] == 1/ { |
| 1954 | self->chdir_arg0 = 0; |
| 1955 | } |
| 1956 | """ |
| 1957 | |
| 1958 | # execve-specific code, tends to throw a lot of exceptions. |
| 1959 | D_CODE_EXECVE = """ |
| 1960 | /* Finally what we care about! */ |
| 1961 | syscall::exec*:entry /trackedpid[pid]/ { |
| 1962 | self->exec_arg0 = copyinstr(arg0); |
| 1963 | /* Incrementally probe for a NULL in the argv parameter of execve() to |
| 1964 | * figure out argc. */ |
| 1965 | /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was |
| 1966 | * found. */ |
| 1967 | self->exec_argc = 0; |
| 1968 | /* Probe for argc==1 */ |
| 1969 | this->exec_argv = (user_addr_t*)copyin( |
| 1970 | arg1, sizeof(user_addr_t) * (self->exec_argc + 1)); |
| 1971 | self->exec_argc = this->exec_argv[self->exec_argc] ? |
| 1972 | (self->exec_argc + 1) : self->exec_argc; |
| 1973 | |
| 1974 | /* Probe for argc==2 */ |
| 1975 | this->exec_argv = (user_addr_t*)copyin( |
| 1976 | arg1, sizeof(user_addr_t) * (self->exec_argc + 1)); |
| 1977 | self->exec_argc = this->exec_argv[self->exec_argc] ? |
| 1978 | (self->exec_argc + 1) : self->exec_argc; |
| 1979 | |
| 1980 | /* Probe for argc==3 */ |
| 1981 | this->exec_argv = (user_addr_t*)copyin( |
| 1982 | arg1, sizeof(user_addr_t) * (self->exec_argc + 1)); |
| 1983 | self->exec_argc = this->exec_argv[self->exec_argc] ? |
| 1984 | (self->exec_argc + 1) : self->exec_argc; |
| 1985 | |
| 1986 | /* Probe for argc==4 */ |
| 1987 | this->exec_argv = (user_addr_t*)copyin( |
| 1988 | arg1, sizeof(user_addr_t) * (self->exec_argc + 1)); |
| 1989 | self->exec_argc = this->exec_argv[self->exec_argc] ? |
| 1990 | (self->exec_argc + 1) : self->exec_argc; |
| 1991 | |
| 1992 | /* Copy the inputs strings since there is no guarantee they'll be |
| 1993 | * present after the call completed. */ |
| 1994 | self->exec_argv0 = (self->exec_argc > 0) ? |
| 1995 | copyinstr(this->exec_argv[0]) : ""; |
| 1996 | self->exec_argv1 = (self->exec_argc > 1) ? |
| 1997 | copyinstr(this->exec_argv[1]) : ""; |
| 1998 | self->exec_argv2 = (self->exec_argc > 2) ? |
| 1999 | copyinstr(this->exec_argv[2]) : ""; |
| 2000 | self->exec_argv3 = (self->exec_argc > 3) ? |
| 2001 | copyinstr(this->exec_argv[3]) : ""; |
| 2002 | this->exec_argv = 0; |
| 2003 | } |
| 2004 | syscall::exec*:return /trackedpid[pid] && errno == 0/ { |
| 2005 | /* We need to join strings here, as using multiple printf() would |
| 2006 | * cause tearing when multiple threads/processes are traced. |
| 2007 | * Since it is impossible to escape a string and join it to another one, |
| 2008 | * like sprintf("%s%S", previous, more), use hackery. |
| 2009 | * Each of the elements are split with a \\1. \\0 cannot be used because |
| 2010 | * it is simply ignored. This will conflict with any program putting a |
| 2011 | * \\1 in their execve() string but this should be "rare enough" */ |
| 2012 | this->args = ""; |
| 2013 | /* Process exec_argv[0] */ |
| 2014 | this->args = strjoin( |
| 2015 | this->args, (self->exec_argc > 0) ? self->exec_argv0 : ""); |
| 2016 | |
| 2017 | /* Process exec_argv[1] */ |
| 2018 | this->args = strjoin( |
| 2019 | this->args, (self->exec_argc > 1) ? "\\1" : ""); |
| 2020 | this->args = strjoin( |
| 2021 | this->args, (self->exec_argc > 1) ? self->exec_argv1 : ""); |
| 2022 | |
| 2023 | /* Process exec_argv[2] */ |
| 2024 | this->args = strjoin( |
| 2025 | this->args, (self->exec_argc > 2) ? "\\1" : ""); |
| 2026 | this->args = strjoin( |
| 2027 | this->args, (self->exec_argc > 2) ? self->exec_argv2 : ""); |
| 2028 | |
| 2029 | /* Process exec_argv[3] */ |
| 2030 | this->args = strjoin( |
| 2031 | this->args, (self->exec_argc > 3) ? "\\1" : ""); |
| 2032 | this->args = strjoin( |
| 2033 | this->args, (self->exec_argc > 3) ? self->exec_argv3 : ""); |
| 2034 | |
| 2035 | /* Prints self->exec_argc to permits verifying the internal |
| 2036 | * consistency since this code is quite fishy. */ |
| 2037 | printf("%d %d %s(\\"%s\\", [%d, %S])\\n", |
| 2038 | logindex, pid, probefunc, |
| 2039 | self->exec_arg0, |
| 2040 | self->exec_argc, |
| 2041 | this->args); |
| 2042 | logindex++; |
| 2043 | this->args = 0; |
| 2044 | } |
| 2045 | syscall::exec*:return /trackedpid[pid]/ { |
| 2046 | self->exec_arg0 = 0; |
| 2047 | self->exec_argc = 0; |
| 2048 | self->exec_argv0 = 0; |
| 2049 | self->exec_argv1 = 0; |
| 2050 | self->exec_argv2 = 0; |
| 2051 | self->exec_argv3 = 0; |
| 2052 | } |
| 2053 | """ |
| 2054 | |
| 2055 | # Code currently not used. |
| 2056 | D_EXTRANEOUS = """ |
| 2057 | /* This is a good learning experience, since it traces a lot of things |
| 2058 | * related to the process and child processes. |
| 2059 | * Warning: it generates a gigantic log. For example, tracing |
| 2060 | * "data/trace_inputs/child1.py --child" generates a 2mb log and takes |
| 2061 | * several minutes to execute. |
| 2062 | */ |
| 2063 | /* |
| 2064 | mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ { |
| 2065 | printf("%d %d %s_%s() = %d\\n", |
| 2066 | logindex, pid, probeprov, probefunc, errno); |
| 2067 | logindex++; |
| 2068 | } |
| 2069 | proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ { |
| 2070 | printf("%d %d %s_%s() = %d\\n", |
| 2071 | logindex, pid, probeprov, probefunc, errno); |
| 2072 | logindex++; |
| 2073 | } |
| 2074 | sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ { |
| 2075 | printf("%d %d %s_%s() = %d\\n", |
| 2076 | logindex, pid, probeprov, probefunc, errno); |
| 2077 | logindex++; |
| 2078 | } |
| 2079 | syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ { |
| 2080 | printf("%d %d %s_%s() = %d\\n", |
| 2081 | logindex, pid, probeprov, probefunc, errno); |
| 2082 | logindex++; |
| 2083 | } |
| 2084 | vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ { |
| 2085 | printf("%d %d %s_%s() = %d\\n", |
| 2086 | logindex, pid, probeprov, probefunc, errno); |
| 2087 | logindex++; |
| 2088 | } |
| 2089 | */ |
| 2090 | /* TODO(maruel): *stat* functions and friends |
| 2091 | syscall::access:return, |
| 2092 | syscall::chdir:return, |
| 2093 | syscall::chflags:return, |
| 2094 | syscall::chown:return, |
| 2095 | syscall::chroot:return, |
| 2096 | syscall::getattrlist:return, |
| 2097 | syscall::getxattr:return, |
| 2098 | syscall::lchown:return, |
| 2099 | syscall::lstat64:return, |
| 2100 | syscall::lstat:return, |
| 2101 | syscall::mkdir:return, |
| 2102 | syscall::pathconf:return, |
| 2103 | syscall::readlink:return, |
| 2104 | syscall::removexattr:return, |
| 2105 | syscall::setxattr:return, |
| 2106 | syscall::stat64:return, |
| 2107 | syscall::stat:return, |
| 2108 | syscall::truncate:return, |
| 2109 | syscall::unlink:return, |
| 2110 | syscall::utimes:return, |
| 2111 | */ |
| 2112 | """ |
| 2113 | |
maruel@chromium.org | f14633b | 2013-01-25 15:12:52 +0000 | [diff] [blame] | 2114 | def __init__(self, logname, use_sudo): |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2115 | """Starts the log collection with dtrace. |
| 2116 | |
| 2117 | Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so |
| 2118 | this needs to wait for dtrace to be "warmed up". |
| 2119 | """ |
| 2120 | super(Dtrace.Tracer, self).__init__(logname) |
maruel@chromium.org | 4c9b4fa | 2013-05-29 19:22:14 +0000 | [diff] [blame] | 2121 | # This script is used as a signal to figure out the root process. |
| 2122 | self._signal_script = create_subprocess_thunk() |
| 2123 | self._scripts_to_cleanup.append(self._signal_script) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2124 | # This unique dummy temp file is used to signal the dtrace script that it |
| 2125 | # should stop as soon as all the child processes are done. A bit hackish |
| 2126 | # but works fine enough. |
| 2127 | self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp( |
Marc-Antoine Ruel | 3c979cb | 2015-03-11 13:43:28 -0400 | [diff] [blame] | 2128 | prefix=u'trace_signal_file') |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2129 | |
maruel@chromium.org | 9b2112a | 2013-04-05 18:01:02 +0000 | [diff] [blame] | 2130 | dtrace_path = '/usr/sbin/dtrace' |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 2131 | if not fs.isfile(dtrace_path): |
maruel@chromium.org | 9b2112a | 2013-04-05 18:01:02 +0000 | [diff] [blame] | 2132 | dtrace_path = 'dtrace' |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 2133 | elif use_sudo is None and (fs.stat(dtrace_path).st_mode & stat.S_ISUID): |
maruel@chromium.org | 9b2112a | 2013-04-05 18:01:02 +0000 | [diff] [blame] | 2134 | # No need to sudo. For those following at home, don't do that. |
| 2135 | use_sudo = False |
| 2136 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2137 | # Note: do not use the -p flag. It's useless if the initial process quits |
| 2138 | # too fast, resulting in missing traces from the grand-children. The D |
| 2139 | # code manages the dtrace lifetime itself. |
| 2140 | trace_cmd = [ |
maruel@chromium.org | 9b2112a | 2013-04-05 18:01:02 +0000 | [diff] [blame] | 2141 | dtrace_path, |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2142 | # Use a larger buffer if getting 'out of scratch space' errors. |
| 2143 | # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables |
| 2144 | '-b', '10m', |
| 2145 | '-x', 'dynvarsize=10m', |
| 2146 | #'-x', 'dtrace_global_maxsize=1m', |
| 2147 | '-x', 'evaltime=exec', |
| 2148 | '-o', '/dev/stderr', |
| 2149 | '-q', |
| 2150 | '-n', self._get_dtrace_code(), |
| 2151 | ] |
maruel@chromium.org | f14633b | 2013-01-25 15:12:52 +0000 | [diff] [blame] | 2152 | if use_sudo is not False: |
| 2153 | trace_cmd.insert(0, 'sudo') |
| 2154 | |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 2155 | with fs.open(self._logname + '.log', 'wb') as logfile: |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2156 | self._dtrace = subprocess.Popen( |
| 2157 | trace_cmd, stdout=logfile, stderr=subprocess.STDOUT) |
| 2158 | logging.debug('Started dtrace pid: %d' % self._dtrace.pid) |
| 2159 | |
| 2160 | # Reads until one line is printed, which signifies dtrace is up and ready. |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 2161 | with fs.open(self._logname + '.log', 'rb') as logfile: |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2162 | while 'dtrace_BEGIN' not in logfile.readline(): |
| 2163 | if self._dtrace.poll() is not None: |
| 2164 | # Do a busy wait. :/ |
| 2165 | break |
| 2166 | logging.debug('dtrace started') |
| 2167 | |
| 2168 | def _get_dtrace_code(self): |
| 2169 | """Setups the D code to implement child process tracking. |
| 2170 | |
| 2171 | Injects the cookie in the script so it knows when to stop. |
| 2172 | |
| 2173 | The script will detect any instance of the script created with |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 2174 | create_subprocess_thunk() and will start tracing it. |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2175 | """ |
maruel@chromium.org | be75f28 | 2012-11-02 19:34:37 +0000 | [diff] [blame] | 2176 | out = ( |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2177 | 'inline int PID = %d;\n' |
| 2178 | 'inline string SCRIPT = "%s";\n' |
| 2179 | 'inline int FILE_ID = %d;\n' |
| 2180 | '\n' |
| 2181 | '%s') % ( |
| 2182 | os.getpid(), |
maruel@chromium.org | 4c9b4fa | 2013-05-29 19:22:14 +0000 | [diff] [blame] | 2183 | self._signal_script, |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2184 | self._dummy_file_id, |
maruel@chromium.org | be75f28 | 2012-11-02 19:34:37 +0000 | [diff] [blame] | 2185 | self.D_CODE) |
| 2186 | if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1': |
| 2187 | # Do not enable by default since it tends to spew dtrace: error lines |
| 2188 | # because the execve() parameters are not in valid memory at the time of |
| 2189 | # logging. |
| 2190 | # TODO(maruel): Find a way to make this reliable since it's useful but |
| 2191 | # only works in limited/trivial uses cases for now. |
| 2192 | out += self.D_CODE_EXECVE |
| 2193 | return out |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2194 | |
| 2195 | def trace(self, cmd, cwd, tracename, output): |
| 2196 | """Runs dtrace on an executable. |
| 2197 | |
| 2198 | This dtruss is broken when it starts the process itself or when tracing |
| 2199 | child processes, this code starts a wrapper process |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 2200 | generated with create_subprocess_thunk() which starts the executable to |
| 2201 | trace. |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2202 | """ |
| 2203 | logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output)) |
| 2204 | assert os.path.isabs(cmd[0]), cmd[0] |
| 2205 | assert os.path.isabs(cwd), cwd |
| 2206 | assert os.path.normpath(cwd) == cwd, cwd |
| 2207 | with self._lock: |
| 2208 | if not self._initialized: |
| 2209 | raise TracingFailure( |
| 2210 | 'Called Tracer.trace() on an unitialized object', |
| 2211 | None, None, None, tracename) |
| 2212 | assert tracename not in (i['trace'] for i in self._traces) |
| 2213 | |
| 2214 | # Starts the script wrapper to start the child process. This signals the |
| 2215 | # dtrace script that this process is to be traced. |
| 2216 | stdout = stderr = None |
| 2217 | if output: |
| 2218 | stdout = subprocess.PIPE |
| 2219 | stderr = subprocess.STDOUT |
| 2220 | child_cmd = [ |
| 2221 | sys.executable, |
maruel@chromium.org | 4c9b4fa | 2013-05-29 19:22:14 +0000 | [diff] [blame] | 2222 | self._signal_script, |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2223 | tracename, |
| 2224 | ] |
| 2225 | # Call a dummy function so that dtrace knows I'm about to launch a process |
| 2226 | # that needs to be traced. |
| 2227 | # Yummy. |
| 2228 | child = subprocess.Popen( |
vadimsh@chromium.org | a432647 | 2013-08-24 02:05:41 +0000 | [diff] [blame] | 2229 | child_cmd + tools.fix_python_path(cmd), |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2230 | stdin=subprocess.PIPE, |
| 2231 | stdout=stdout, |
| 2232 | stderr=stderr, |
| 2233 | cwd=cwd) |
| 2234 | logging.debug('Started child pid: %d' % child.pid) |
| 2235 | |
| 2236 | out = child.communicate()[0] |
| 2237 | # This doesn't mean tracing is done, one of the grand-child process may |
| 2238 | # still be alive. It will be tracked with the dtrace script. |
| 2239 | |
| 2240 | with self._lock: |
| 2241 | assert tracename not in (i['trace'] for i in self._traces) |
| 2242 | self._traces.append( |
| 2243 | { |
| 2244 | 'cmd': cmd, |
| 2245 | 'cwd': cwd, |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2246 | 'pid': child.pid, |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2247 | 'output': out, |
maruel@chromium.org | 964ddde | 2012-10-29 18:13:46 +0000 | [diff] [blame] | 2248 | 'trace': tracename, |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2249 | }) |
| 2250 | return child.returncode, out |
| 2251 | |
| 2252 | def close(self, timeout=None): |
| 2253 | """Terminates dtrace.""" |
| 2254 | logging.debug('close(%s)' % timeout) |
| 2255 | try: |
| 2256 | try: |
| 2257 | super(Dtrace.Tracer, self).close(timeout) |
| 2258 | # Signal dtrace that it should stop now. |
maruel@chromium.org | f43e68b | 2012-10-15 20:23:10 +0000 | [diff] [blame] | 2259 | # ftruncate doesn't exist on Windows. |
| 2260 | os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101 |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2261 | if timeout: |
| 2262 | start = time.time() |
| 2263 | # Use polling. :/ |
| 2264 | while (self._dtrace.poll() is None and |
| 2265 | (time.time() - start) < timeout): |
| 2266 | time.sleep(0.1) |
| 2267 | self._dtrace.kill() |
| 2268 | self._dtrace.wait() |
| 2269 | finally: |
| 2270 | # Make sure to kill it in any case. |
| 2271 | if self._dtrace.poll() is None: |
| 2272 | try: |
| 2273 | self._dtrace.kill() |
| 2274 | self._dtrace.wait() |
| 2275 | except OSError: |
| 2276 | pass |
| 2277 | |
| 2278 | if self._dtrace.returncode != 0: |
| 2279 | # Warn about any dtrace failure but basically ignore it. |
| 2280 | print 'dtrace failure: %s' % self._dtrace.returncode |
| 2281 | finally: |
| 2282 | os.close(self._dummy_file_id) |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 2283 | fs.remove(self._dummy_file_name) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2284 | |
| 2285 | def post_process_log(self): |
| 2286 | """Sorts the log back in order when each call occured. |
| 2287 | |
| 2288 | dtrace doesn't save the buffer in strict order since it keeps one buffer |
| 2289 | per CPU. |
| 2290 | """ |
| 2291 | super(Dtrace.Tracer, self).post_process_log() |
| 2292 | logname = self._logname + '.log' |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 2293 | with fs.open(logname, 'rb') as logfile: |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2294 | lines = [l for l in logfile if l.strip()] |
| 2295 | errors = [l for l in lines if l.startswith('dtrace:')] |
| 2296 | if errors: |
| 2297 | raise TracingFailure( |
| 2298 | 'Found errors in the trace: %s' % '\n'.join(errors), |
| 2299 | None, None, None, logname) |
| 2300 | try: |
| 2301 | lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0])) |
| 2302 | except ValueError: |
| 2303 | raise TracingFailure( |
| 2304 | 'Found errors in the trace: %s' % '\n'.join( |
| 2305 | l for l in lines if l.split(' ', 1)[0].isdigit()), |
| 2306 | None, None, None, logname) |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 2307 | with fs.open(logname, 'wb') as logfile: |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2308 | logfile.write(''.join(lines)) |
| 2309 | |
maruel@chromium.org | f14633b | 2013-01-25 15:12:52 +0000 | [diff] [blame] | 2310 | def __init__(self, use_sudo=None): |
| 2311 | super(Dtrace, self).__init__() |
| 2312 | self.use_sudo = use_sudo |
| 2313 | |
| 2314 | def get_tracer(self, logname): |
| 2315 | return self.Tracer(logname, self.use_sudo) |
| 2316 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2317 | @staticmethod |
| 2318 | def clean_trace(logname): |
| 2319 | for ext in ('', '.log'): |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 2320 | if fs.isfile(logname + ext): |
| 2321 | fs.remove(logname + ext) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2322 | |
| 2323 | @classmethod |
maruel@chromium.org | ec74ff8 | 2012-10-29 18:14:47 +0000 | [diff] [blame] | 2324 | def parse_log(cls, logname, blacklist, trace_name): |
| 2325 | logging.info('parse_log(%s, ..., %s)', logname, trace_name) |
maruel@chromium.org | 8b056ba | 2012-10-16 14:04:49 +0000 | [diff] [blame] | 2326 | assert os.path.isabs(logname) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2327 | |
| 2328 | def blacklist_more(filepath): |
| 2329 | # All the HFS metadata is in the form /.vol/... |
| 2330 | return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath) |
| 2331 | |
Marc-Antoine Ruel | de01180 | 2013-11-12 15:19:47 -0500 | [diff] [blame] | 2332 | data = tools.read_json(logname) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2333 | out = [] |
| 2334 | for item in data['traces']: |
maruel@chromium.org | ec74ff8 | 2012-10-29 18:14:47 +0000 | [diff] [blame] | 2335 | if trace_name and item['trace'] != trace_name: |
| 2336 | continue |
maruel@chromium.org | 964ddde | 2012-10-29 18:13:46 +0000 | [diff] [blame] | 2337 | result = { |
| 2338 | 'output': item['output'], |
| 2339 | 'trace': item['trace'], |
| 2340 | } |
| 2341 | try: |
| 2342 | context = cls.Context(blacklist_more, item['pid'], item['cwd']) |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 2343 | # It's fine to assume the file as UTF-8: OSX enforces the file names to |
| 2344 | # be valid UTF-8 and we control the log output. |
| 2345 | for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'): |
maruel@chromium.org | 964ddde | 2012-10-29 18:13:46 +0000 | [diff] [blame] | 2346 | context.on_line(line) |
| 2347 | result['results'] = context.to_results() |
| 2348 | except TracingFailure: |
| 2349 | result['exception'] = sys.exc_info() |
| 2350 | out.append(result) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2351 | return out |
| 2352 | |
| 2353 | |
| 2354 | class LogmanTrace(ApiBase): |
| 2355 | """Uses the native Windows ETW based tracing functionality to trace a child |
| 2356 | process. |
| 2357 | |
| 2358 | Caveat: this implementations doesn't track cwd or initial_cwd. It is because |
| 2359 | the Windows Kernel doesn't have a concept of 'current working directory' at |
| 2360 | all. A Win32 process has a map of current directories, one per drive letter |
| 2361 | and it is managed by the user mode kernel32.dll. In kernel, a file is always |
| 2362 | opened relative to another file_object or as an absolute path. All the current |
| 2363 | working directory logic is done in user mode. |
| 2364 | """ |
| 2365 | class Context(ApiBase.Context): |
| 2366 | """Processes a ETW log line and keeps the list of existent and non |
| 2367 | existent files accessed. |
| 2368 | |
| 2369 | Ignores directories. |
| 2370 | """ |
| 2371 | # These indexes are for the stripped version in json. |
| 2372 | EVENT_NAME = 0 |
| 2373 | TYPE = 1 |
| 2374 | PID = 2 |
| 2375 | TID = 3 |
| 2376 | PROCESSOR_ID = 4 |
| 2377 | TIMESTAMP = 5 |
| 2378 | USER_DATA = 6 |
| 2379 | |
| 2380 | class Process(ApiBase.Context.Process): |
| 2381 | def __init__(self, *args): |
| 2382 | super(LogmanTrace.Context.Process, self).__init__(*args) |
| 2383 | # Handle file objects that succeeded. |
| 2384 | self.file_objects = {} |
| 2385 | |
maruel@chromium.org | 19e9fcd | 2012-10-29 18:24:02 +0000 | [diff] [blame] | 2386 | def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd): |
| 2387 | logging.info( |
| 2388 | '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name, |
| 2389 | thunk_cmd) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2390 | super(LogmanTrace.Context, self).__init__(blacklist) |
maruel@chromium.org | 561d4b2 | 2013-09-26 21:08:08 +0000 | [diff] [blame] | 2391 | self._drive_map = file_path.DosDriveMap() |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2392 | # Threads mapping to the corresponding process id. |
| 2393 | self._threads_active = {} |
maruel@chromium.org | 19e9fcd | 2012-10-29 18:24:02 +0000 | [diff] [blame] | 2394 | # Process ID of the tracer, e.g. the temporary script created by |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 2395 | # create_subprocess_thunk(). This is tricky because the process id may |
| 2396 | # have been reused. |
maruel@chromium.org | 19e9fcd | 2012-10-29 18:24:02 +0000 | [diff] [blame] | 2397 | self._thunk_pid = thunk_pid |
| 2398 | self._thunk_cmd = thunk_cmd |
| 2399 | self._trace_name = trace_name |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2400 | self._line_number = 0 |
maruel@chromium.org | 19e9fcd | 2012-10-29 18:24:02 +0000 | [diff] [blame] | 2401 | self._thunk_process = None |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2402 | |
| 2403 | def on_line(self, line): |
| 2404 | """Processes a json Event line.""" |
| 2405 | self._line_number += 1 |
| 2406 | try: |
| 2407 | # By Opcode |
| 2408 | handler = getattr( |
| 2409 | self, |
| 2410 | 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]), |
| 2411 | None) |
| 2412 | if not handler: |
| 2413 | raise TracingFailure( |
| 2414 | 'Unexpected event %s_%s' % ( |
| 2415 | line[self.EVENT_NAME], line[self.TYPE]), |
| 2416 | None, None, None) |
| 2417 | handler(line) |
| 2418 | except TracingFailure, e: |
| 2419 | # Hack in the values since the handler could be a static function. |
| 2420 | e.pid = line[self.PID] |
| 2421 | e.line = line |
| 2422 | e.line_number = self._line_number |
| 2423 | # Re-raise the modified exception. |
| 2424 | raise |
| 2425 | except (KeyError, NotImplementedError, ValueError), e: |
| 2426 | raise TracingFailure( |
| 2427 | 'Trace generated a %s exception: %s' % ( |
| 2428 | e.__class__.__name__, str(e)), |
| 2429 | line[self.PID], |
| 2430 | self._line_number, |
| 2431 | line, |
| 2432 | e) |
| 2433 | |
| 2434 | def to_results(self): |
| 2435 | if not self.root_process: |
| 2436 | raise TracingFailure( |
maruel@chromium.org | 19e9fcd | 2012-10-29 18:24:02 +0000 | [diff] [blame] | 2437 | 'Failed to detect the initial process %d' % self._thunk_pid, |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2438 | None, None, None) |
| 2439 | process = self.root_process.to_results_process() |
| 2440 | return Results(process) |
| 2441 | |
| 2442 | def _thread_to_process(self, tid): |
| 2443 | """Finds the process from the thread id.""" |
| 2444 | tid = int(tid, 16) |
| 2445 | pid = self._threads_active.get(tid) |
| 2446 | if not pid or not self._process_lookup.get(pid): |
| 2447 | return |
| 2448 | return self._process_lookup[pid] |
| 2449 | |
| 2450 | @classmethod |
| 2451 | def handle_EventTrace_Header(cls, line): |
| 2452 | """Verifies no event was dropped, e.g. no buffer overrun occured.""" |
| 2453 | BUFFER_SIZE = cls.USER_DATA |
| 2454 | #VERSION = cls.USER_DATA + 1 |
| 2455 | #PROVIDER_VERSION = cls.USER_DATA + 2 |
| 2456 | #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3 |
| 2457 | #END_TIME = cls.USER_DATA + 4 |
| 2458 | #TIMER_RESOLUTION = cls.USER_DATA + 5 |
| 2459 | #MAX_FILE_SIZE = cls.USER_DATA + 6 |
| 2460 | #LOG_FILE_MODE = cls.USER_DATA + 7 |
| 2461 | #BUFFERS_WRITTEN = cls.USER_DATA + 8 |
| 2462 | #START_BUFFERS = cls.USER_DATA + 9 |
| 2463 | #POINTER_SIZE = cls.USER_DATA + 10 |
| 2464 | EVENTS_LOST = cls.USER_DATA + 11 |
| 2465 | #CPU_SPEED = cls.USER_DATA + 12 |
| 2466 | #LOGGER_NAME = cls.USER_DATA + 13 |
| 2467 | #LOG_FILE_NAME = cls.USER_DATA + 14 |
| 2468 | #BOOT_TIME = cls.USER_DATA + 15 |
| 2469 | #PERF_FREQ = cls.USER_DATA + 16 |
| 2470 | #START_TIME = cls.USER_DATA + 17 |
| 2471 | #RESERVED_FLAGS = cls.USER_DATA + 18 |
| 2472 | #BUFFERS_LOST = cls.USER_DATA + 19 |
| 2473 | #SESSION_NAME_STRING = cls.USER_DATA + 20 |
| 2474 | #LOG_FILE_NAME_STRING = cls.USER_DATA + 21 |
| 2475 | if line[EVENTS_LOST] != '0': |
| 2476 | raise TracingFailure( |
| 2477 | ( '%s events were lost during trace, please increase the buffer ' |
| 2478 | 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]), |
| 2479 | None, None, None) |
| 2480 | |
| 2481 | def handle_FileIo_Cleanup(self, line): |
| 2482 | """General wisdom: if a file is closed, it's because it was opened. |
| 2483 | |
| 2484 | Note that FileIo_Close is not used since if a file was opened properly but |
| 2485 | not closed before the process exits, only Cleanup will be logged. |
| 2486 | """ |
| 2487 | #IRP = self.USER_DATA |
| 2488 | TTID = self.USER_DATA + 1 # Thread ID, that's what we want. |
| 2489 | FILE_OBJECT = self.USER_DATA + 2 |
| 2490 | #FILE_KEY = self.USER_DATA + 3 |
| 2491 | proc = self._thread_to_process(line[TTID]) |
| 2492 | if not proc: |
| 2493 | # Not a process we care about. |
| 2494 | return |
| 2495 | file_object = line[FILE_OBJECT] |
| 2496 | if file_object in proc.file_objects: |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 2497 | filepath, access_type = proc.file_objects.pop(file_object) |
| 2498 | proc.add_file(filepath, access_type) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2499 | |
| 2500 | def handle_FileIo_Create(self, line): |
| 2501 | """Handles a file open. |
| 2502 | |
| 2503 | All FileIo events are described at |
| 2504 | http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx |
| 2505 | for some value of 'description'. |
| 2506 | |
| 2507 | " (..) process and thread id values of the IO events (..) are not valid " |
| 2508 | http://msdn.microsoft.com/magazine/ee358703.aspx |
| 2509 | |
| 2510 | The FileIo.Create event doesn't return if the CreateFile() call |
| 2511 | succeeded, so keep track of the file_object and check that it is |
| 2512 | eventually closed with FileIo_Cleanup. |
| 2513 | """ |
| 2514 | #IRP = self.USER_DATA |
| 2515 | TTID = self.USER_DATA + 1 # Thread ID, that's what we want. |
| 2516 | FILE_OBJECT = self.USER_DATA + 2 |
| 2517 | #CREATE_OPTIONS = self.USER_DATA + 3 |
| 2518 | #FILE_ATTRIBUTES = self.USER_DATA + 4 |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 2519 | #SHARE_ACCESS = self.USER_DATA + 5 |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2520 | OPEN_PATH = self.USER_DATA + 6 |
| 2521 | |
| 2522 | proc = self._thread_to_process(line[TTID]) |
| 2523 | if not proc: |
| 2524 | # Not a process we care about. |
| 2525 | return |
| 2526 | |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 2527 | raw_path = line[OPEN_PATH] |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2528 | # Ignore directories and bare drive right away. |
| 2529 | if raw_path.endswith(os.path.sep): |
| 2530 | return |
| 2531 | filepath = self._drive_map.to_win32(raw_path) |
| 2532 | # Ignore bare drive right away. Some may still fall through with format |
| 2533 | # like '\\?\X:' |
| 2534 | if len(filepath) == 2: |
| 2535 | return |
| 2536 | file_object = line[FILE_OBJECT] |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 2537 | if fs.isdir(filepath): |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2538 | # There is no O_DIRECTORY equivalent on Windows. The closed is |
| 2539 | # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So |
| 2540 | # simply discard directories are they are found. |
| 2541 | return |
maruel@chromium.org | 538141b | 2013-06-03 20:57:17 +0000 | [diff] [blame] | 2542 | # Override any stale file object. |
| 2543 | # TODO(maruel): Figure out a way to detect if the file was opened for |
| 2544 | # reading or writting. Sadly CREATE_OPTIONS doesn't seem to be of any help |
| 2545 | # here. For now mark as None to make it clear we have no idea what it is |
| 2546 | # about. |
| 2547 | proc.file_objects[file_object] = (filepath, None) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2548 | |
| 2549 | def handle_FileIo_Rename(self, line): |
| 2550 | # TODO(maruel): Handle? |
| 2551 | pass |
| 2552 | |
| 2553 | def handle_Process_End(self, line): |
| 2554 | pid = line[self.PID] |
| 2555 | if self._process_lookup.get(pid): |
| 2556 | logging.info('Terminated: %d' % pid) |
| 2557 | self._process_lookup[pid] = None |
| 2558 | else: |
| 2559 | logging.debug('Terminated: %d' % pid) |
maruel@chromium.org | 19e9fcd | 2012-10-29 18:24:02 +0000 | [diff] [blame] | 2560 | if self._thunk_process and self._thunk_process.pid == pid: |
| 2561 | self._thunk_process = None |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2562 | |
| 2563 | def handle_Process_Start(self, line): |
| 2564 | """Handles a new child process started by PID.""" |
| 2565 | #UNIQUE_PROCESS_KEY = self.USER_DATA |
| 2566 | PROCESS_ID = self.USER_DATA + 1 |
| 2567 | #PARENT_PID = self.USER_DATA + 2 |
| 2568 | #SESSION_ID = self.USER_DATA + 3 |
| 2569 | #EXIT_STATUS = self.USER_DATA + 4 |
| 2570 | #DIRECTORY_TABLE_BASE = self.USER_DATA + 5 |
| 2571 | #USER_SID = self.USER_DATA + 6 |
| 2572 | IMAGE_FILE_NAME = self.USER_DATA + 7 |
| 2573 | COMMAND_LINE = self.USER_DATA + 8 |
| 2574 | |
| 2575 | ppid = line[self.PID] |
| 2576 | pid = int(line[PROCESS_ID], 16) |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 2577 | command_line = CommandLineToArgvW(line[COMMAND_LINE]) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2578 | logging.debug( |
| 2579 | 'New process %d->%d (%s) %s' % |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 2580 | (ppid, pid, line[IMAGE_FILE_NAME], command_line)) |
maruel@chromium.org | 19e9fcd | 2012-10-29 18:24:02 +0000 | [diff] [blame] | 2581 | |
| 2582 | if pid == self._thunk_pid: |
| 2583 | # Need to ignore processes we don't know about because the log is |
| 2584 | # system-wide. self._thunk_pid shall start only one process. |
| 2585 | # This is tricky though because Windows *loves* to reuse process id and |
| 2586 | # it happens often that the process ID of the thunk script created by |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 2587 | # create_subprocess_thunk() is reused. So just detecting the pid here is |
| 2588 | # not sufficient, we must confirm the command line. |
maruel@chromium.org | 19e9fcd | 2012-10-29 18:24:02 +0000 | [diff] [blame] | 2589 | if command_line[:len(self._thunk_cmd)] != self._thunk_cmd: |
| 2590 | logging.info( |
| 2591 | 'Ignoring duplicate pid %d for %s: %s while searching for %s', |
| 2592 | pid, self._trace_name, command_line, self._thunk_cmd) |
| 2593 | return |
| 2594 | |
| 2595 | # TODO(maruel): The check is quite weak. Add the thunk path. |
| 2596 | if self._thunk_process: |
| 2597 | raise TracingFailure( |
| 2598 | ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is ' |
| 2599 | 'already set') % (self._thunk_pid, self._thunk_process.pid), |
| 2600 | None, None, None) |
| 2601 | proc = self.Process(self.blacklist, pid, None) |
| 2602 | self._thunk_process = proc |
| 2603 | return |
| 2604 | elif ppid == self._thunk_pid and self._thunk_process: |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2605 | proc = self.Process(self.blacklist, pid, None) |
| 2606 | self.root_process = proc |
| 2607 | ppid = None |
| 2608 | elif self._process_lookup.get(ppid): |
| 2609 | proc = self.Process(self.blacklist, pid, None) |
| 2610 | self._process_lookup[ppid].children.append(proc) |
| 2611 | else: |
| 2612 | # Ignore |
| 2613 | return |
| 2614 | self._process_lookup[pid] = proc |
| 2615 | |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 2616 | proc.command = command_line |
| 2617 | proc.executable = line[IMAGE_FILE_NAME] |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2618 | # proc.command[0] may be the absolute path of 'executable' but it may be |
| 2619 | # anything else too. If it happens that command[0] ends with executable, |
| 2620 | # use it, otherwise defaults to the base name. |
| 2621 | cmd0 = proc.command[0].lower() |
| 2622 | if not cmd0.endswith('.exe'): |
| 2623 | # TODO(maruel): That's not strictly true either. |
| 2624 | cmd0 += '.exe' |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 2625 | if cmd0.endswith(proc.executable) and fs.isfile(cmd0): |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2626 | # Fix the path. |
| 2627 | cmd0 = cmd0.replace('/', os.path.sep) |
| 2628 | cmd0 = os.path.normpath(cmd0) |
maruel@chromium.org | 561d4b2 | 2013-09-26 21:08:08 +0000 | [diff] [blame] | 2629 | proc.executable = file_path.get_native_path_case(cmd0) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2630 | logging.info( |
| 2631 | 'New child: %s -> %d %s' % (ppid, pid, proc.executable)) |
| 2632 | |
| 2633 | def handle_Thread_End(self, line): |
| 2634 | """Has the same parameters as Thread_Start.""" |
| 2635 | tid = int(line[self.TID], 16) |
| 2636 | self._threads_active.pop(tid, None) |
| 2637 | |
| 2638 | def handle_Thread_Start(self, line): |
| 2639 | """Handles a new thread created. |
| 2640 | |
| 2641 | Do not use self.PID here since a process' initial thread is created by |
| 2642 | the parent process. |
| 2643 | """ |
| 2644 | PROCESS_ID = self.USER_DATA |
| 2645 | TTHREAD_ID = self.USER_DATA + 1 |
| 2646 | #STACK_BASE = self.USER_DATA + 2 |
| 2647 | #STACK_LIMIT = self.USER_DATA + 3 |
| 2648 | #USER_STACK_BASE = self.USER_DATA + 4 |
| 2649 | #USER_STACK_LIMIT = self.USER_DATA + 5 |
| 2650 | #AFFINITY = self.USER_DATA + 6 |
| 2651 | #WIN32_START_ADDR = self.USER_DATA + 7 |
| 2652 | #TEB_BASE = self.USER_DATA + 8 |
| 2653 | #SUB_PROCESS_TAG = self.USER_DATA + 9 |
| 2654 | #BASE_PRIORITY = self.USER_DATA + 10 |
| 2655 | #PAGE_PRIORITY = self.USER_DATA + 11 |
| 2656 | #IO_PRIORITY = self.USER_DATA + 12 |
| 2657 | #THREAD_FLAGS = self.USER_DATA + 13 |
| 2658 | # Do not use self.PID here since a process' initial thread is created by |
| 2659 | # the parent process. |
| 2660 | pid = int(line[PROCESS_ID], 16) |
| 2661 | tid = int(line[TTHREAD_ID], 16) |
| 2662 | logging.debug('New thread pid:%d, tid:%d' % (pid, tid)) |
| 2663 | self._threads_active[tid] = pid |
| 2664 | |
| 2665 | @classmethod |
| 2666 | def supported_events(cls): |
| 2667 | """Returns all the procesed events.""" |
| 2668 | out = [] |
| 2669 | for member in dir(cls): |
| 2670 | match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member) |
| 2671 | if match: |
| 2672 | out.append(match.groups()) |
| 2673 | return out |
| 2674 | |
| 2675 | class Tracer(ApiBase.Tracer): |
| 2676 | # The basic headers. |
| 2677 | EXPECTED_HEADER = [ |
| 2678 | u'Event Name', |
| 2679 | u'Type', |
| 2680 | u'Event ID', |
| 2681 | u'Version', |
| 2682 | u'Channel', |
| 2683 | u'Level', # 5 |
| 2684 | u'Opcode', |
| 2685 | u'Task', |
| 2686 | u'Keyword', |
| 2687 | u'PID', |
| 2688 | u'TID', # 10 |
| 2689 | u'Processor Number', |
| 2690 | u'Instance ID', |
| 2691 | u'Parent Instance ID', |
| 2692 | u'Activity ID', |
| 2693 | u'Related Activity ID', # 15 |
| 2694 | u'Clock-Time', |
| 2695 | u'Kernel(ms)', # Both have a resolution of ~15ms which makes them |
| 2696 | u'User(ms)', # pretty much useless. |
| 2697 | u'User Data', # Extra arguments that are event-specific. |
| 2698 | ] |
| 2699 | # Only the useful headers common to all entries are listed there. Any column |
| 2700 | # at 19 or higher is dependent on the specific event. |
| 2701 | EVENT_NAME = 0 |
| 2702 | TYPE = 1 |
| 2703 | PID = 9 |
| 2704 | TID = 10 |
| 2705 | PROCESSOR_ID = 11 |
| 2706 | TIMESTAMP = 16 |
| 2707 | NULL_GUID = '{00000000-0000-0000-0000-000000000000}' |
| 2708 | USER_DATA = 19 |
| 2709 | |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 2710 | class CsvReader(object): |
| 2711 | """CSV reader that reads files generated by tracerpt.exe. |
| 2712 | |
| 2713 | csv.reader() fails to read them properly, it mangles file names quoted |
| 2714 | with "" with a comma in it. |
| 2715 | """ |
| 2716 | # 0. Had a ',' or one of the following ' ' after a comma, next should |
| 2717 | # be ' ', '"' or string or ',' for an empty field. |
| 2718 | ( HAD_DELIMITER, |
| 2719 | # 1. Processing an unquoted field up to ','. |
| 2720 | IN_STR, |
| 2721 | # 2. Processing a new field starting with '"'. |
| 2722 | STARTING_STR_QUOTED, |
| 2723 | # 3. Second quote in a row at the start of a field. It could be either |
| 2724 | # '""foo""' or '""'. Who the hell thought it was a great idea to use |
| 2725 | # the same character for delimiting and escaping? |
| 2726 | STARTING_SECOND_QUOTE, |
| 2727 | # 4. A quote inside a quoted string where the previous character was |
| 2728 | # not a quote, so the string is not empty. Can be either: end of a |
| 2729 | # quoted string (a delimiter) or a quote escape. The next char must be |
| 2730 | # either '"' or ','. |
| 2731 | HAD_QUOTE_IN_QUOTED, |
| 2732 | # 5. Second quote inside a quoted string. |
| 2733 | HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED, |
| 2734 | # 6. Processing a field that started with '"'. |
| 2735 | IN_STR_QUOTED) = range(7) |
| 2736 | |
| 2737 | def __init__(self, f): |
| 2738 | self.f = f |
| 2739 | |
| 2740 | def __iter__(self): |
| 2741 | return self |
| 2742 | |
| 2743 | def next(self): |
| 2744 | """Splits the line in fields.""" |
| 2745 | line = self.f.readline() |
| 2746 | if not line: |
| 2747 | raise StopIteration() |
| 2748 | line = line.strip() |
| 2749 | fields = [] |
| 2750 | state = self.HAD_DELIMITER |
| 2751 | for i, c in enumerate(line): |
| 2752 | if state == self.HAD_DELIMITER: |
| 2753 | if c == ',': |
| 2754 | # Empty field. |
| 2755 | fields.append('') |
| 2756 | elif c == ' ': |
| 2757 | # Ignore initial whitespaces |
| 2758 | pass |
| 2759 | elif c == '"': |
| 2760 | state = self.STARTING_STR_QUOTED |
| 2761 | fields.append('') |
| 2762 | else: |
| 2763 | # Start of a new field. |
| 2764 | state = self.IN_STR |
| 2765 | fields.append(c) |
| 2766 | |
| 2767 | elif state == self.IN_STR: |
| 2768 | # Do not accept quote inside unquoted field. |
| 2769 | assert c != '"', (i, c, line, fields) |
| 2770 | if c == ',': |
| 2771 | fields[-1] = fields[-1].strip() |
| 2772 | state = self.HAD_DELIMITER |
| 2773 | else: |
| 2774 | fields[-1] = fields[-1] + c |
| 2775 | |
| 2776 | elif state == self.STARTING_STR_QUOTED: |
| 2777 | if c == '"': |
| 2778 | # Do not store the character yet. |
| 2779 | state = self.STARTING_SECOND_QUOTE |
| 2780 | else: |
| 2781 | state = self.IN_STR_QUOTED |
| 2782 | fields[-1] = fields[-1] + c |
| 2783 | |
| 2784 | elif state == self.STARTING_SECOND_QUOTE: |
| 2785 | if c == ',': |
| 2786 | # It was an empty field. '""' == ''. |
| 2787 | state = self.HAD_DELIMITER |
| 2788 | else: |
| 2789 | fields[-1] = fields[-1] + '"' + c |
| 2790 | state = self.IN_STR_QUOTED |
| 2791 | |
| 2792 | elif state == self.HAD_QUOTE_IN_QUOTED: |
| 2793 | if c == ',': |
| 2794 | # End of the string. |
| 2795 | state = self.HAD_DELIMITER |
| 2796 | elif c == '"': |
| 2797 | state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED |
| 2798 | else: |
| 2799 | # The previous double-quote was just an unescaped quote. |
| 2800 | fields[-1] = fields[-1] + '"' + c |
| 2801 | state = self.IN_STR_QUOTED |
| 2802 | |
| 2803 | elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED: |
| 2804 | if c == ',': |
| 2805 | # End of the string. |
| 2806 | state = self.HAD_DELIMITER |
| 2807 | fields[-1] = fields[-1] + '"' |
| 2808 | else: |
maruel@chromium.org | 261d135 | 2013-02-25 19:23:20 +0000 | [diff] [blame] | 2809 | # That's just how the logger rolls. Revert back to appending the |
| 2810 | # char and "guess" it was a quote in a double-quoted string. |
| 2811 | state = self.IN_STR_QUOTED |
| 2812 | fields[-1] = fields[-1] + '"' + c |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 2813 | |
| 2814 | elif state == self.IN_STR_QUOTED: |
| 2815 | if c == '"': |
| 2816 | # Could be a delimiter or an escape. |
| 2817 | state = self.HAD_QUOTE_IN_QUOTED |
| 2818 | else: |
| 2819 | fields[-1] = fields[-1] + c |
| 2820 | |
| 2821 | if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED: |
| 2822 | fields[-1] = fields[-1] + '"' |
| 2823 | else: |
| 2824 | assert state in ( |
| 2825 | # Terminated with a normal field. |
| 2826 | self.IN_STR, |
| 2827 | # Terminated with an empty field. |
| 2828 | self.STARTING_SECOND_QUOTE, |
| 2829 | # Terminated with a normal quoted field. |
| 2830 | self.HAD_QUOTE_IN_QUOTED), ( |
| 2831 | line, state, fields) |
| 2832 | return fields |
| 2833 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2834 | def __init__(self, logname): |
| 2835 | """Starts the log collection. |
| 2836 | |
| 2837 | Requires administrative access. logman.exe is synchronous so no need for a |
| 2838 | "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID |
| 2839 | instead. The GUID constant name is SystemTraceControlGuid. Lovely. |
| 2840 | |
| 2841 | One can get the list of potentially interesting providers with: |
| 2842 | "logman query providers | findstr /i file" |
| 2843 | """ |
| 2844 | super(LogmanTrace.Tracer, self).__init__(logname) |
maruel@chromium.org | 4c9b4fa | 2013-05-29 19:22:14 +0000 | [diff] [blame] | 2845 | self._signal_script = create_subprocess_thunk() |
| 2846 | self._scripts_to_cleanup.append(self._signal_script) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2847 | cmd_start = [ |
| 2848 | 'logman.exe', |
| 2849 | 'start', |
| 2850 | 'NT Kernel Logger', |
| 2851 | '-p', '{9e814aad-3204-11d2-9a82-006008a86939}', |
| 2852 | # splitio,fileiocompletion,syscall,file,cswitch,img |
| 2853 | '(process,fileio,thread)', |
| 2854 | '-o', self._logname + '.etl', |
| 2855 | '-ets', # Send directly to kernel |
| 2856 | # Values extracted out of thin air. |
| 2857 | # Event Trace Session buffer size in kb. |
| 2858 | '-bs', '10240', |
| 2859 | # Number of Event Trace Session buffers. |
| 2860 | '-nb', '16', '256', |
| 2861 | ] |
| 2862 | logging.debug('Running: %s' % cmd_start) |
| 2863 | try: |
| 2864 | subprocess.check_call( |
| 2865 | cmd_start, |
| 2866 | stdin=subprocess.PIPE, |
| 2867 | stdout=subprocess.PIPE, |
| 2868 | stderr=subprocess.STDOUT) |
| 2869 | except subprocess.CalledProcessError, e: |
| 2870 | if e.returncode == -2147024891: |
| 2871 | print >> sys.stderr, 'Please restart with an elevated admin prompt' |
| 2872 | elif e.returncode == -2144337737: |
| 2873 | print >> sys.stderr, ( |
| 2874 | 'A kernel trace was already running, stop it and try again') |
| 2875 | raise |
| 2876 | |
| 2877 | def trace(self, cmd, cwd, tracename, output): |
| 2878 | logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output)) |
| 2879 | assert os.path.isabs(cmd[0]), cmd[0] |
| 2880 | assert os.path.isabs(cwd), cwd |
| 2881 | assert os.path.normpath(cwd) == cwd, cwd |
| 2882 | with self._lock: |
| 2883 | if not self._initialized: |
| 2884 | raise TracingFailure( |
| 2885 | 'Called Tracer.trace() on an unitialized object', |
| 2886 | None, None, None, tracename) |
| 2887 | assert tracename not in (i['trace'] for i in self._traces) |
| 2888 | |
| 2889 | # Use "logman -?" for help. |
| 2890 | |
| 2891 | stdout = stderr = None |
| 2892 | if output: |
| 2893 | stdout = subprocess.PIPE |
| 2894 | stderr = subprocess.STDOUT |
| 2895 | |
| 2896 | # Run the child process. |
| 2897 | logging.debug('Running: %s' % cmd) |
maruel@chromium.org | 2909b7c | 2013-05-29 18:52:59 +0000 | [diff] [blame] | 2898 | # Use the temporary script generated with create_subprocess_thunk() so we |
| 2899 | # have a clear pid owner. Since trace_inputs.py can be used as a library |
| 2900 | # and could trace multiple processes simultaneously, it makes it more |
| 2901 | # complex if the executable to be traced is executed directly here. It |
| 2902 | # also solves issues related to logman.exe that needs to be executed to |
| 2903 | # control the kernel trace. |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2904 | child_cmd = [ |
| 2905 | sys.executable, |
maruel@chromium.org | 4c9b4fa | 2013-05-29 19:22:14 +0000 | [diff] [blame] | 2906 | self._signal_script, |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2907 | tracename, |
| 2908 | ] |
| 2909 | child = subprocess.Popen( |
vadimsh@chromium.org | a432647 | 2013-08-24 02:05:41 +0000 | [diff] [blame] | 2910 | child_cmd + tools.fix_python_path(cmd), |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2911 | cwd=cwd, |
| 2912 | stdin=subprocess.PIPE, |
| 2913 | stdout=stdout, |
| 2914 | stderr=stderr) |
| 2915 | logging.debug('Started child pid: %d' % child.pid) |
| 2916 | out = child.communicate()[0] |
| 2917 | # This doesn't mean all the grand-children are done. Sadly, we don't have |
| 2918 | # a good way to determine that. |
| 2919 | |
| 2920 | with self._lock: |
| 2921 | assert tracename not in (i['trace'] for i in self._traces) |
| 2922 | self._traces.append({ |
maruel@chromium.org | f43e68b | 2012-10-15 20:23:10 +0000 | [diff] [blame] | 2923 | 'cmd': cmd, |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2924 | 'cwd': cwd, |
maruel@chromium.org | 964ddde | 2012-10-29 18:13:46 +0000 | [diff] [blame] | 2925 | 'output': out, |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2926 | 'pid': child.pid, |
maruel@chromium.org | 19e9fcd | 2012-10-29 18:24:02 +0000 | [diff] [blame] | 2927 | # Used to figure out the real process when process ids are reused. |
| 2928 | 'thunk_cmd': child_cmd, |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2929 | 'trace': tracename, |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2930 | }) |
| 2931 | |
| 2932 | return child.returncode, out |
| 2933 | |
| 2934 | def close(self, _timeout=None): |
| 2935 | """Stops the kernel log collection and converts the traces to text |
| 2936 | representation. |
| 2937 | """ |
| 2938 | with self._lock: |
maruel@chromium.org | 808e3f2 | 2013-05-16 16:42:54 +0000 | [diff] [blame] | 2939 | try: |
| 2940 | super(LogmanTrace.Tracer, self).close() |
| 2941 | finally: |
| 2942 | cmd_stop = [ |
| 2943 | 'logman.exe', |
| 2944 | 'stop', |
| 2945 | 'NT Kernel Logger', |
| 2946 | '-ets', # Sends the command directly to the kernel. |
| 2947 | ] |
| 2948 | logging.debug('Running: %s' % cmd_stop) |
| 2949 | subprocess.check_call( |
| 2950 | cmd_stop, |
| 2951 | stdin=subprocess.PIPE, |
| 2952 | stdout=subprocess.PIPE, |
| 2953 | stderr=subprocess.STDOUT) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2954 | |
| 2955 | def post_process_log(self): |
| 2956 | """Converts the .etl file into .csv then into .json.""" |
| 2957 | super(LogmanTrace.Tracer, self).post_process_log() |
| 2958 | logformat = 'csv' |
| 2959 | self._convert_log(logformat) |
maruel@chromium.org | eaedeb4 | 2012-10-31 14:38:22 +0000 | [diff] [blame] | 2960 | self._trim_log(logformat) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2961 | |
maruel@chromium.org | 808e3f2 | 2013-05-16 16:42:54 +0000 | [diff] [blame] | 2962 | def _gen_logdata(self): |
| 2963 | return { |
| 2964 | 'format': 'csv', |
| 2965 | 'traces': self._traces, |
| 2966 | } |
| 2967 | |
maruel@chromium.org | eaedeb4 | 2012-10-31 14:38:22 +0000 | [diff] [blame] | 2968 | def _trim_log(self, logformat): |
| 2969 | """Reduces the amount of data in original log by generating a 'reduced' |
| 2970 | log. |
| 2971 | """ |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2972 | if logformat == 'csv_utf16': |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 2973 | file_handle = codecs.open( |
| 2974 | self._logname + '.' + logformat, 'r', encoding='utf-16') |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2975 | |
| 2976 | elif logformat == 'csv': |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 2977 | assert sys.getfilesystemencoding() == 'mbcs' |
| 2978 | file_handle = codecs.open( |
| 2979 | self._logname + '.' + logformat, 'r', |
| 2980 | encoding=get_current_encoding()) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2981 | |
| 2982 | supported_events = LogmanTrace.Context.supported_events() |
| 2983 | |
| 2984 | def trim(generator): |
maruel@chromium.org | eaedeb4 | 2012-10-31 14:38:22 +0000 | [diff] [blame] | 2985 | """Loads items from the generator and returns the interesting data. |
| 2986 | |
| 2987 | It filters out any uninteresting line and reduce the amount of data in |
| 2988 | the trace. |
| 2989 | """ |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2990 | for index, line in enumerate(generator): |
| 2991 | if not index: |
| 2992 | if line != self.EXPECTED_HEADER: |
| 2993 | raise TracingFailure( |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 2994 | 'Found malformed header: %s' % line, |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 2995 | None, None, None) |
| 2996 | continue |
| 2997 | # As you can see, the CSV is full of useful non-redundant information: |
| 2998 | if (line[2] != '0' or # Event ID |
| 2999 | line[3] not in ('2', '3') or # Version |
| 3000 | line[4] != '0' or # Channel |
| 3001 | line[5] != '0' or # Level |
| 3002 | line[7] != '0' or # Task |
| 3003 | line[8] != '0x0000000000000000' or # Keyword |
| 3004 | line[12] != '' or # Instance ID |
| 3005 | line[13] != '' or # Parent Instance ID |
| 3006 | line[14] != self.NULL_GUID or # Activity ID |
| 3007 | line[15] != ''): # Related Activity ID |
| 3008 | raise TracingFailure( |
| 3009 | 'Found unexpected values in line: %s' % ' '.join(line), |
| 3010 | None, None, None) |
| 3011 | |
| 3012 | if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events: |
| 3013 | continue |
| 3014 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3015 | yield [ |
| 3016 | line[self.EVENT_NAME], |
| 3017 | line[self.TYPE], |
| 3018 | line[self.PID], |
| 3019 | line[self.TID], |
| 3020 | line[self.PROCESSOR_ID], |
| 3021 | line[self.TIMESTAMP], |
| 3022 | ] + line[self.USER_DATA:] |
| 3023 | |
maruel@chromium.org | eaedeb4 | 2012-10-31 14:38:22 +0000 | [diff] [blame] | 3024 | # must not convert the trim() call into a list, since it will use too much |
| 3025 | # memory for large trace. use a csv file as a workaround since the json |
| 3026 | # parser requires a complete in-memory file. |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 3027 | path = os.path.join( |
| 3028 | unicode(os.getcwd()), u'%s.preprocessed' % self._logname) |
| 3029 | with fs.open(path, 'wb') as f: |
maruel@chromium.org | eaedeb4 | 2012-10-31 14:38:22 +0000 | [diff] [blame] | 3030 | # $ and * can't be used in file name on windows, reducing the likelihood |
| 3031 | # of having to escape a string. |
| 3032 | out = csv.writer( |
| 3033 | f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL) |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 3034 | for line in trim(self.CsvReader(file_handle)): |
maruel@chromium.org | eaedeb4 | 2012-10-31 14:38:22 +0000 | [diff] [blame] | 3035 | out.writerow([s.encode('utf-8') for s in line]) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3036 | |
| 3037 | def _convert_log(self, logformat): |
| 3038 | """Converts the ETL trace to text representation. |
| 3039 | |
| 3040 | Normally, 'csv' is sufficient. If complex scripts are used (like eastern |
| 3041 | languages), use 'csv_utf16'. If localization gets in the way, use 'xml'. |
| 3042 | |
| 3043 | Arguments: |
| 3044 | - logformat: Text format to be generated, csv, csv_utf16 or xml. |
| 3045 | |
| 3046 | Use "tracerpt -?" for help. |
| 3047 | """ |
| 3048 | LOCALE_INVARIANT = 0x7F |
| 3049 | windll.kernel32.SetThreadLocale(LOCALE_INVARIANT) |
| 3050 | cmd_convert = [ |
| 3051 | 'tracerpt.exe', |
| 3052 | '-l', self._logname + '.etl', |
| 3053 | '-o', self._logname + '.' + logformat, |
| 3054 | '-gmt', # Use UTC |
| 3055 | '-y', # No prompt |
| 3056 | # Use -of XML to get the header of each items after column 19, e.g. all |
| 3057 | # the actual headers of 'User Data'. |
| 3058 | ] |
| 3059 | |
| 3060 | if logformat == 'csv': |
| 3061 | # tracerpt localizes the 'Type' column, for major brainfuck |
| 3062 | # entertainment. I can't imagine any sane reason to do that. |
| 3063 | cmd_convert.extend(['-of', 'CSV']) |
| 3064 | elif logformat == 'csv_utf16': |
| 3065 | # This causes it to use UTF-16, which doubles the log size but ensures |
| 3066 | # the log is readable for non-ASCII characters. |
| 3067 | cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode']) |
| 3068 | elif logformat == 'xml': |
| 3069 | cmd_convert.extend(['-of', 'XML']) |
| 3070 | else: |
| 3071 | raise ValueError('Unexpected log format \'%s\'' % logformat) |
| 3072 | logging.debug('Running: %s' % cmd_convert) |
| 3073 | # This can takes tens of minutes for large logs. |
| 3074 | # Redirects all output to stderr. |
| 3075 | subprocess.check_call( |
| 3076 | cmd_convert, |
| 3077 | stdin=subprocess.PIPE, |
| 3078 | stdout=sys.stderr, |
| 3079 | stderr=sys.stderr) |
| 3080 | |
maruel@chromium.org | f14633b | 2013-01-25 15:12:52 +0000 | [diff] [blame] | 3081 | def __init__(self, use_sudo=False): # pylint: disable=W0613 |
| 3082 | super(LogmanTrace, self).__init__() |
| 3083 | # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API. |
| 3084 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3085 | @staticmethod |
| 3086 | def clean_trace(logname): |
maruel@chromium.org | eaedeb4 | 2012-10-31 14:38:22 +0000 | [diff] [blame] | 3087 | for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'): |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 3088 | if fs.isfile(logname + ext): |
| 3089 | fs.remove(logname + ext) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3090 | |
| 3091 | @classmethod |
maruel@chromium.org | ec74ff8 | 2012-10-29 18:14:47 +0000 | [diff] [blame] | 3092 | def parse_log(cls, logname, blacklist, trace_name): |
| 3093 | logging.info('parse_log(%s, ..., %s)', logname, trace_name) |
maruel@chromium.org | 8b056ba | 2012-10-16 14:04:49 +0000 | [diff] [blame] | 3094 | assert os.path.isabs(logname) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3095 | |
| 3096 | def blacklist_more(filepath): |
| 3097 | # All the NTFS metadata is in the form x:\$EXTEND or stuff like that. |
| 3098 | return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath) |
| 3099 | |
maruel@chromium.org | eaedeb4 | 2012-10-31 14:38:22 +0000 | [diff] [blame] | 3100 | # Create a list of (Context, result_dict) tuples. This is necessary because |
| 3101 | # the csv file may be larger than the amount of available memory. |
| 3102 | contexes = [ |
| 3103 | ( |
| 3104 | cls.Context( |
| 3105 | blacklist_more, item['pid'], item['trace'], item['thunk_cmd']), |
| 3106 | { |
| 3107 | 'output': item['output'], |
| 3108 | 'trace': item['trace'], |
| 3109 | }, |
| 3110 | ) |
Marc-Antoine Ruel | de01180 | 2013-11-12 15:19:47 -0500 | [diff] [blame] | 3111 | for item in tools.read_json(logname)['traces'] |
maruel@chromium.org | eaedeb4 | 2012-10-31 14:38:22 +0000 | [diff] [blame] | 3112 | if not trace_name or item['trace'] == trace_name |
| 3113 | ] |
| 3114 | |
| 3115 | # The log may be too large to fit in memory and it is not efficient to read |
| 3116 | # it multiple times, so multiplex the contexes instead, which is slightly |
| 3117 | # more awkward. |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 3118 | path = os.path.join(unicode(os.getcwd()), u'%s.preprocessed' % logname) |
| 3119 | with fs.open(path, 'rb') as f: |
maruel@chromium.org | eaedeb4 | 2012-10-31 14:38:22 +0000 | [diff] [blame] | 3120 | lines = csv.reader( |
| 3121 | f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL) |
| 3122 | for encoded in lines: |
| 3123 | line = [s.decode('utf-8') for s in encoded] |
| 3124 | # Convert the PID in-place from hex. |
| 3125 | line[cls.Context.PID] = int(line[cls.Context.PID], 16) |
| 3126 | for context in contexes: |
| 3127 | if 'exception' in context[1]: |
| 3128 | continue |
| 3129 | try: |
| 3130 | context[0].on_line(line) |
| 3131 | except TracingFailure: |
| 3132 | context[1]['exception'] = sys.exc_info() |
| 3133 | |
| 3134 | for context in contexes: |
| 3135 | if 'exception' in context[1]: |
maruel@chromium.org | ec74ff8 | 2012-10-29 18:14:47 +0000 | [diff] [blame] | 3136 | continue |
maruel@chromium.org | eaedeb4 | 2012-10-31 14:38:22 +0000 | [diff] [blame] | 3137 | context[1]['results'] = context[0].to_results() |
| 3138 | |
| 3139 | return [context[1] for context in contexes] |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3140 | |
| 3141 | |
maruel@chromium.org | f14633b | 2013-01-25 15:12:52 +0000 | [diff] [blame] | 3142 | def get_api(**kwargs): |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3143 | """Returns the correct implementation for the current OS.""" |
| 3144 | if sys.platform == 'cygwin': |
| 3145 | raise NotImplementedError( |
| 3146 | 'Not implemented for cygwin, start the script from Win32 python') |
| 3147 | flavors = { |
| 3148 | 'win32': LogmanTrace, |
| 3149 | 'darwin': Dtrace, |
| 3150 | 'sunos5': Dtrace, |
| 3151 | 'freebsd7': Dtrace, |
| 3152 | 'freebsd8': Dtrace, |
| 3153 | } |
| 3154 | # Defaults to strace. |
maruel@chromium.org | f14633b | 2013-01-25 15:12:52 +0000 | [diff] [blame] | 3155 | return flavors.get(sys.platform, Strace)(**kwargs) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3156 | |
| 3157 | |
| 3158 | def extract_directories(root_dir, files, blacklist): |
| 3159 | """Detects if all the files in a directory are in |files| and if so, replace |
| 3160 | the individual files by a Results.Directory instance. |
| 3161 | |
| 3162 | Takes a list of Results.File instances and returns a shorter list of |
| 3163 | Results.File and Results.Directory instances. |
| 3164 | |
| 3165 | Arguments: |
| 3166 | - root_dir: Optional base directory that shouldn't be search further. |
| 3167 | - files: list of Results.File instances. |
maruel@chromium.org | 3683afe | 2013-07-27 00:09:27 +0000 | [diff] [blame] | 3168 | - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'. |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3169 | """ |
| 3170 | logging.info( |
| 3171 | 'extract_directories(%s, %d files, ...)' % (root_dir, len(files))) |
| 3172 | assert not (root_dir or '').endswith(os.path.sep), root_dir |
maruel@chromium.org | 306e0e7 | 2012-11-02 18:22:03 +0000 | [diff] [blame] | 3173 | # It is important for root_dir to not be a symlinked path, make sure to call |
| 3174 | # os.path.realpath() as needed. |
| 3175 | assert not root_dir or ( |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 3176 | fs.realpath(file_path.get_native_path_case(root_dir)) == root_dir) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3177 | assert not any(isinstance(f, Results.Directory) for f in files) |
| 3178 | # Remove non existent files. |
| 3179 | files = [f for f in files if f.existent] |
| 3180 | if not files: |
| 3181 | return files |
| 3182 | # All files must share the same root, which can be None. |
| 3183 | assert len(set(f.root for f in files)) == 1, set(f.root for f in files) |
| 3184 | |
| 3185 | # Creates a {directory: {filename: File}} mapping, up to root. |
| 3186 | buckets = {} |
| 3187 | if root_dir: |
| 3188 | buckets[root_dir] = {} |
| 3189 | for fileobj in files: |
| 3190 | path = fileobj.full_path |
| 3191 | directory = os.path.dirname(path) |
| 3192 | assert directory |
| 3193 | # Do not use os.path.basename() so trailing os.path.sep is kept. |
| 3194 | basename = path[len(directory)+1:] |
| 3195 | files_in_directory = buckets.setdefault(directory, {}) |
| 3196 | files_in_directory[basename] = fileobj |
| 3197 | # Add all the directories recursively up to root. |
| 3198 | while True: |
| 3199 | old_d = directory |
| 3200 | directory = os.path.dirname(directory) |
| 3201 | if directory + os.path.sep == root_dir or directory == old_d: |
| 3202 | break |
| 3203 | buckets.setdefault(directory, {}) |
| 3204 | |
| 3205 | root_prefix = len(root_dir) + 1 if root_dir else 0 |
| 3206 | for directory in sorted(buckets, reverse=True): |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 3207 | if not fs.isdir(directory): |
maruel@chromium.org | b1678e6 | 2013-03-19 15:55:17 +0000 | [diff] [blame] | 3208 | logging.debug( |
| 3209 | '%s was a directory but doesn\'t exist anymore; ignoring', directory) |
| 3210 | continue |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 3211 | actual = set(f for f in fs.listdir(directory) if not blacklist(f)) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3212 | expected = set(buckets[directory]) |
| 3213 | if not (actual - expected): |
| 3214 | parent = os.path.dirname(directory) |
| 3215 | buckets[parent][os.path.basename(directory)] = Results.Directory( |
| 3216 | root_dir, |
| 3217 | directory[root_prefix:], |
| 3218 | False, |
| 3219 | sum(f.size for f in buckets[directory].itervalues()), |
| 3220 | sum(f.nb_files for f in buckets[directory].itervalues())) |
| 3221 | # Remove the whole bucket. |
| 3222 | del buckets[directory] |
| 3223 | |
| 3224 | # Reverse the mapping with what remains. The original instances are returned, |
| 3225 | # so the cached meta data is kept. |
| 3226 | files = sum((x.values() for x in buckets.itervalues()), []) |
| 3227 | return sorted(files, key=lambda x: x.path) |
| 3228 | |
| 3229 | |
| 3230 | def trace(logfile, cmd, cwd, api, output): |
| 3231 | """Traces an executable. Returns (returncode, output) from api. |
| 3232 | |
| 3233 | Arguments: |
| 3234 | - logfile: file to write to. |
| 3235 | - cmd: command to run. |
| 3236 | - cwd: current directory to start the process in. |
| 3237 | - api: a tracing api instance. |
| 3238 | - output: if True, returns output, otherwise prints it at the console. |
| 3239 | """ |
vadimsh@chromium.org | a432647 | 2013-08-24 02:05:41 +0000 | [diff] [blame] | 3240 | cmd = tools.fix_python_path(cmd) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3241 | api.clean_trace(logfile) |
| 3242 | with api.get_tracer(logfile) as tracer: |
| 3243 | return tracer.trace(cmd, cwd, 'default', output) |
| 3244 | |
| 3245 | |
maruel@chromium.org | e532251 | 2013-08-19 20:17:57 +0000 | [diff] [blame] | 3246 | def CMDclean(parser, args): |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3247 | """Cleans up traces.""" |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3248 | options, args = parser.parse_args(args) |
| 3249 | api = get_api() |
| 3250 | api.clean_trace(options.log) |
| 3251 | return 0 |
| 3252 | |
| 3253 | |
maruel@chromium.org | e532251 | 2013-08-19 20:17:57 +0000 | [diff] [blame] | 3254 | def CMDtrace(parser, args): |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3255 | """Traces an executable.""" |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3256 | parser.allow_interspersed_args = False |
| 3257 | parser.add_option( |
| 3258 | '-q', '--quiet', action='store_true', |
| 3259 | help='Redirects traced executable output to /dev/null') |
maruel@chromium.org | f14633b | 2013-01-25 15:12:52 +0000 | [diff] [blame] | 3260 | parser.add_option( |
| 3261 | '-s', '--sudo', action='store_true', |
| 3262 | help='Use sudo when shelling out the tracer tool (ignored on Windows)') |
| 3263 | parser.add_option( |
| 3264 | '-n', '--no-sudo', action='store_false', |
| 3265 | help='Don\'t use sudo') |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3266 | options, args = parser.parse_args(args) |
| 3267 | |
| 3268 | if not args: |
| 3269 | parser.error('Please provide a command to run') |
| 3270 | |
Marc-Antoine Ruel | 5906fcd | 2013-11-08 21:02:24 -0500 | [diff] [blame] | 3271 | if not can_trace(): |
| 3272 | parser.error('Please rerun this program with admin privileges') |
| 3273 | |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 3274 | if not os.path.isabs(args[0]) and fs.access(args[0], os.X_OK): |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3275 | args[0] = os.path.abspath(args[0]) |
| 3276 | |
maruel@chromium.org | f14633b | 2013-01-25 15:12:52 +0000 | [diff] [blame] | 3277 | # options.sudo default value is None, which is to do whatever tracer defaults |
| 3278 | # do. |
| 3279 | api = get_api(use_sudo=options.sudo) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3280 | return trace(options.log, args, os.getcwd(), api, options.quiet)[0] |
| 3281 | |
| 3282 | |
maruel@chromium.org | e532251 | 2013-08-19 20:17:57 +0000 | [diff] [blame] | 3283 | def CMDread(parser, args): |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3284 | """Reads the logs and prints the result.""" |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3285 | parser.add_option( |
| 3286 | '-V', '--variable', |
| 3287 | nargs=2, |
| 3288 | action='append', |
| 3289 | dest='variables', |
| 3290 | metavar='VAR_NAME directory', |
| 3291 | default=[], |
| 3292 | help=('Variables to replace relative directories against. Example: ' |
| 3293 | '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your ' |
| 3294 | 'home dir with $HOME') % getpass.getuser()) |
| 3295 | parser.add_option( |
| 3296 | '--root-dir', |
| 3297 | help='Root directory to base everything off it. Anything outside of this ' |
| 3298 | 'this directory will not be reported') |
| 3299 | parser.add_option( |
maruel@chromium.org | ec74ff8 | 2012-10-29 18:14:47 +0000 | [diff] [blame] | 3300 | '--trace-name', |
| 3301 | help='Only reads one of the trace. Defaults to reading all traces') |
| 3302 | parser.add_option( |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3303 | '-j', '--json', action='store_true', |
| 3304 | help='Outputs raw result data as json') |
| 3305 | parser.add_option( |
maruel@chromium.org | 3683afe | 2013-07-27 00:09:27 +0000 | [diff] [blame] | 3306 | '--trace-blacklist', action='append', default=[], |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3307 | help='List of regexp to use as blacklist filter') |
| 3308 | options, args = parser.parse_args(args) |
| 3309 | |
| 3310 | if options.root_dir: |
maruel@chromium.org | 561d4b2 | 2013-09-26 21:08:08 +0000 | [diff] [blame] | 3311 | options.root_dir = file_path.get_native_path_case( |
maruel@chromium.org | 7dc17b3 | 2013-04-02 22:27:05 +0000 | [diff] [blame] | 3312 | unicode(os.path.abspath(options.root_dir))) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3313 | |
| 3314 | variables = dict(options.variables) |
| 3315 | api = get_api() |
Marc-Antoine Ruel | ac54cb4 | 2013-11-18 14:05:35 -0500 | [diff] [blame] | 3316 | blacklist = tools.gen_blacklist(options.trace_blacklist) |
maruel@chromium.org | ec74ff8 | 2012-10-29 18:14:47 +0000 | [diff] [blame] | 3317 | data = api.parse_log(options.log, blacklist, options.trace_name) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3318 | # Process each trace. |
| 3319 | output_as_json = [] |
maruel@chromium.org | 9e56b59 | 2013-05-21 16:56:05 +0000 | [diff] [blame] | 3320 | try: |
| 3321 | for item in data: |
| 3322 | if 'exception' in item: |
| 3323 | # Do not abort the other traces. |
| 3324 | print >> sys.stderr, ( |
| 3325 | 'Trace %s: Got an exception: %s' % ( |
| 3326 | item['trace'], item['exception'][1])) |
| 3327 | continue |
| 3328 | results = item['results'] |
| 3329 | if options.root_dir: |
| 3330 | results = results.strip_root(options.root_dir) |
| 3331 | |
| 3332 | if options.json: |
| 3333 | output_as_json.append(results.flatten()) |
| 3334 | else: |
| 3335 | simplified = extract_directories( |
| 3336 | options.root_dir, results.files, blacklist) |
| 3337 | simplified = [f.replace_variables(variables) for f in simplified] |
| 3338 | if len(data) > 1: |
| 3339 | print('Trace: %s' % item['trace']) |
| 3340 | print('Total: %d' % len(results.files)) |
| 3341 | print('Non existent: %d' % len(results.non_existent)) |
| 3342 | for f in results.non_existent: |
| 3343 | print(' %s' % f.path) |
| 3344 | print( |
| 3345 | 'Interesting: %d reduced to %d' % ( |
| 3346 | len(results.existent), len(simplified))) |
| 3347 | for f in simplified: |
| 3348 | print(' %s' % f.path) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3349 | |
| 3350 | if options.json: |
Marc-Antoine Ruel | de01180 | 2013-11-12 15:19:47 -0500 | [diff] [blame] | 3351 | tools.write_json(sys.stdout, output_as_json, False) |
maruel@chromium.org | 9e56b59 | 2013-05-21 16:56:05 +0000 | [diff] [blame] | 3352 | except KeyboardInterrupt: |
| 3353 | return 1 |
| 3354 | except IOError as e: |
| 3355 | if e.errno == errno.EPIPE: |
| 3356 | # Do not print a stack trace when the output is piped to less and the user |
| 3357 | # quits before the whole output was written. |
| 3358 | return 1 |
| 3359 | raise |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3360 | return 0 |
| 3361 | |
| 3362 | |
Marc-Antoine Ruel | f74cffe | 2015-07-15 15:21:34 -0400 | [diff] [blame] | 3363 | class OptionParserTraceInputs(logging_utils.OptionParserWithLogging): |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3364 | """Adds automatic --log handling.""" |
vadimsh@chromium.org | a432647 | 2013-08-24 02:05:41 +0000 | [diff] [blame] | 3365 | |
| 3366 | # Disable --log-file options since both --log and --log-file options are |
| 3367 | # confusing. |
| 3368 | # TODO(vadimsh): Rename --log-file or --log to something else. |
| 3369 | enable_log_file = False |
| 3370 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3371 | def __init__(self, **kwargs): |
Marc-Antoine Ruel | f74cffe | 2015-07-15 15:21:34 -0400 | [diff] [blame] | 3372 | logging_utils.OptionParserWithLogging.__init__(self, **kwargs) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3373 | self.add_option( |
| 3374 | '-l', '--log', help='Log file to generate or read, required') |
| 3375 | |
| 3376 | def parse_args(self, *args, **kwargs): |
| 3377 | """Makes sure the paths make sense. |
| 3378 | |
| 3379 | On Windows, / and \ are often mixed together in a path. |
| 3380 | """ |
Marc-Antoine Ruel | f74cffe | 2015-07-15 15:21:34 -0400 | [diff] [blame] | 3381 | options, args = logging_utils.OptionParserWithLogging.parse_args( |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3382 | self, *args, **kwargs) |
| 3383 | if not options.log: |
| 3384 | self.error('Must supply a log file with -l') |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 3385 | options.log = unicode(os.path.abspath(options.log)) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3386 | return options, args |
| 3387 | |
| 3388 | |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3389 | def main(argv): |
maruel@chromium.org | e532251 | 2013-08-19 20:17:57 +0000 | [diff] [blame] | 3390 | dispatcher = subcommand.CommandDispatcher(__name__) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3391 | try: |
maruel@chromium.org | 3d67199 | 2013-08-20 00:38:27 +0000 | [diff] [blame] | 3392 | return dispatcher.execute( |
| 3393 | OptionParserTraceInputs(version=__version__), argv) |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3394 | except TracingFailure, e: |
| 3395 | sys.stderr.write('\nError: ') |
| 3396 | sys.stderr.write(str(e)) |
| 3397 | sys.stderr.write('\n') |
| 3398 | return 1 |
| 3399 | |
| 3400 | |
| 3401 | if __name__ == '__main__': |
maruel | 8e4e40c | 2016-05-30 06:21:07 -0700 | [diff] [blame] | 3402 | subprocess42.inhibit_os_error_reporting() |
maruel@chromium.org | e532251 | 2013-08-19 20:17:57 +0000 | [diff] [blame] | 3403 | fix_encoding.fix_encoding() |
vadimsh@chromium.org | a432647 | 2013-08-24 02:05:41 +0000 | [diff] [blame] | 3404 | tools.disable_buffering() |
maruel@chromium.org | e532251 | 2013-08-19 20:17:57 +0000 | [diff] [blame] | 3405 | colorama.init() |
maruel@chromium.org | 8fb47fe | 2012-10-03 20:13:15 +0000 | [diff] [blame] | 3406 | sys.exit(main(sys.argv[1:])) |