Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2009-03-19 20:25:24 +0000

[diff] [blame]

9

import getpass, os, sys, re, stat, tempfile, time, select, subprocess

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

10

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

11

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

12

from autotest_lib.client.common_lib import base_job

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

13

from autotest_lib.client.common_lib import error, log, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

14

from autotest_lib.client.common_lib import logging_manager

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

15

from autotest_lib.server import test, subcommand, profilers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

16

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

17

18

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

19

def _control_segment_path(name):

20

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

21

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

22

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

23

24

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

25

CLIENT_CONTROL_FILENAME = 'control'

26

SERVER_CONTROL_FILENAME = 'control.srv'

27

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

28

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

29

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

30

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

31

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

32

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

33

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

34

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

35

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

36

REPAIR_CONTROL_FILE = _control_segment_path('repair')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

37

38

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

39

# by default provide a stub that generates no site data

40

def _get_site_job_data_dummy(job):

return {}

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

44

# load up site-specific code for generating site-specific job data

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

45

get_site_job_data = utils.import_site_function(__file__,

jadmanski

c0a623d

2009-03-03 21:11:48 +0000

[diff] [blame]

46

"autotest_lib.server.site_server_job", "get_site_job_data",

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

47

_get_site_job_data_dummy)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

48

49

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

50

class base_server_job(base_job.base_job):

51

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

52

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

53

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

62

"""

63

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

64

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

65

66

def __init__(self, control, args, resultdir, label, user, machines,

67

client=False, parse_job='',

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

68

ssh_user='root', ssh_port=22, ssh_pass='',

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

69

group_name='', tag=''):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

70

"""

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

71

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

72

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

73

@param control: The pathname of the control file.

74

@param args: Passed to the control file.

75

@param resultdir: Where to throw the results.

76

@param label: Description of the job.

77

@param user: Username for the job (email address).

78

@param client: True if this is a client-side control file.

79

@param parse_job: string, if supplied it is the job execution tag that

80

the results will be passed through to the TKO parser with.

81

@param ssh_user: The SSH username. [root]

82

@param ssh_port: The SSH port number. [22]

83

@param ssh_pass: The SSH passphrase, if needed.

84

@param group_name: If supplied, this will be written out as

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

85

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

86

@param tag: The job execution tag from the scheduler. [optional]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

87

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

88

super(base_server_job, self).__init__(resultdir=resultdir)

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

89

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

90

path = os.path.dirname(__file__)

91

self.control = control

92

self._uncollected_log_file = os.path.join(self.resultdir,

93

'uncollected_logs')

94

debugdir = os.path.join(self.resultdir, 'debug')

95

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

102

103

self._args = args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

104

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

105

self._client = client

106

self._record_prefix = ''

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

107

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

108

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

109

self._ssh_user = ssh_user

110

self._ssh_port = ssh_port

111

self._ssh_pass = ssh_pass

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

112

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

113

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

114

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

115

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

116

self.drop_caches_between_iterations = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

117

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

118

self.logging = logging_manager.get_logging_manager(

119

manage_stdout_and_stderr=True, redirect_fds=True)

120

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

121

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

122

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

123

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

124

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

125

job_data = {'label' : label, 'user' : user,

126

'hostname' : ','.join(machines),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

127

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

128

'job_started' : str(int(time.time()))}

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

129

if group_name:

130

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

131

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

132

# only write these keyvals out on the first job in a resultdir

133

if 'job_started' not in utils.read_keyval(self.resultdir):

134

job_data.update(get_site_job_data(self))

135

utils.write_keyval(self.resultdir, job_data)

136

137

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

138

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

139

self.pkgmgr = packages.PackageManager(

140

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

141

self.num_tests_run = 0

142

self.num_tests_failed = 0

143

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

144

self._register_subcommand_hooks()

145

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

146

# these components aren't usable on the server

147

self.bootloader = None

self.harness = None

@classmethod

def _find_base_directories(cls):

153

"""

154

Determine locations of autodir, clientdir and serverdir. Assumes

155

that this file is located within serverdir and uses __file__ along

156

with relative paths to resolve the location.

157

"""

158

serverdir = os.path.abspath(os.path.dirname(__file__))

159

autodir = os.path.normpath(os.path.join(serverdir, '..'))

160

clientdir = os.path.join(autodir, 'client')

161

return autodir, clientdir, serverdir

162

163

164

def _find_resultdir(self, resultdir):

165

"""

166

Determine the location of resultdir. For server jobs we expect one to

167

always be explicitly passed in to __init__, so just return that.

168

"""

169

if resultdir:

170

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

174

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

175

@staticmethod

176

def _load_control_file(path):

177

f = open(path)

178

try:

179

control_file = f.read()

180

finally:

181

f.close()

182

return re.sub('\r', '', control_file)

183

184

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

185

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

186

"""

187

Register some hooks into the subcommand modules that allow us

188

to properly clean up self.hosts created in forked subprocesses.

189

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

190

def on_fork(cmd):

191

self._existing_hosts_on_fork = set(self.hosts)

192

def on_join(cmd):

193

new_hosts = self.hosts - self._existing_hosts_on_fork

194

for host in new_hosts:

195

host.close()

196

subcommand.subcommand.register_fork_hook(on_fork)

197

subcommand.subcommand.register_join_hook(on_join)

198

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

199

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

200

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

201

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

202

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

203

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

204

the database if necessary.

205

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

206

if not self._using_parser:

207

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

208

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

209

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

210

parse_log = open(parse_log, 'w', 0)

211

tko_utils.redirect_parser_debugging(parse_log)

212

# create a job model object and set up the db

213

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

214

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

215

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

216

self.parser.start(self.job_model)

217

# check if a job already exists in the db and insert it if

218

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

219

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

220

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

221

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

222

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

223

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

224

self.job_model.index = job_idx

225

self.job_model.machine_idx = machine_idx

226

227

228

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

229

"""

230

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

231

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

232

remaining test results to the results db)

233

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

234

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

235

return

236

final_tests = self.parser.end()

237

for test in final_tests:

238

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

239

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def verify(self):

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

244

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

245

if self.resultdir:

246

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

247

try:

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

248

namespace = {'machines' : self.machines, 'job' : self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

249

'ssh_user' : self._ssh_user,

250

'ssh_port' : self._ssh_port,

251

'ssh_pass' : self._ssh_pass}

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

252

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

253

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

254

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

255

self.record('ABORT', None, None, msg)

raise

def repair(self, host_protection):

260

if not self.machines:

261

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

262

if self.resultdir:

263

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

264

namespace = {'machines': self.machines, 'job': self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

265

'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,

266

'ssh_pass': self._ssh_pass,

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

267

'protection_level': host_protection}

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

268

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

269

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def precheck(self):

"""

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

280

"""

281

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

287

"""

288

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

294

"""

295

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

300

def _make_parallel_wrapper(self, function, machines, log):

301

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

302

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

303

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

304

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

305

self._parse_job += "/" + machine

306

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

307

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

308

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

309

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

310

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

311

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

312

result = function(machine)

313

self.cleanup_parser()

314

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

315

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

316

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

317

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

318

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

319

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

320

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

321

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

322

result = function(machine)

323

return result

324

else:

325

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

330

return_results=False):

331

"""

332

Run 'function' using parallel_simple, with an extra wrapper to handle

333

the necessary setup for continuous parsing, if possible. If continuous

334

parsing is already properly initialized then this should just work.

335

336

@param function: A callable to run in parallel given each machine.

337

@param machines: A list of machine names to be passed one per subcommand

338

invocation of function.

339

@param log: If True, output will be written to output in a subdirectory

340

named after each machine.

341

@param timeout: Seconds after which the function call should timeout.

342

@param return_results: If True instead of an AutoServError being raised

343

on any error a list of the results|exceptions from the function

344

called on each arg is returned. [default: False]

345

346

@raises error.AutotestError: If any of the functions failed.

347

"""

348

wrapper = self._make_parallel_wrapper(function, machines, log)

349

return subcommand.parallel_simple(wrapper, machines,

350

log=log, timeout=timeout,

351

return_results=return_results)

352

353

354

def parallel_on_machines(self, function, machines, timeout=None):

355

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

356

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

357

@param machines: A list of machines to call function(machine) on.

358

@param timeout: Seconds after which the function call should timeout.

359

360

@returns A list of machines on which function(machine) returned

361

without raising an exception.

362

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

363

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

364

return_results=True)

365

success_machines = []

366

for result, machine in itertools.izip(results, machines):

367

if not isinstance(result, Exception):

368

success_machines.append(machine)

369

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

370

371

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

372

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

373

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

374

collect_crashdumps=True, namespace={}, control=None,

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

375

control_file_dir=None, only_collect_crashinfo=False):

jadmanski

fb9c0fa

2009-04-29 17:39:16 +0000

[diff] [blame]

376

# for a normal job, make sure the uncollected logs file exists

377

# for a crashinfo-only run it should already exist, bail out otherwise

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

378

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

fb9c0fa

2009-04-29 17:39:16 +0000

[diff] [blame]

379

if only_collect_crashinfo:

380

# if this is a crashinfo-only run, and there were no existing

381

# uncollected logs, just bail out early

382

logging.info("No existing uncollected logs, "

383

"skipping crashinfo collection")

384

return

385

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

386

log_file = open(self._uncollected_log_file, "w")

jadmanski

fb9c0fa

2009-04-29 17:39:16 +0000

[diff] [blame]

387

pickle.dump([], log_file)

388

log_file.close()

389

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

390

# use a copy so changes don't affect the original dictionary

391

namespace = namespace.copy()

392

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

393

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

394

if self.control is None:

395

control = ''

396

else:

397

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

398

if control_file_dir is None:

399

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

400

401

self.aborted = False

402

namespace['machines'] = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

403

namespace['args'] = self._args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

404

namespace['job'] = self

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

405

namespace['ssh_user'] = self._ssh_user

406

namespace['ssh_port'] = self._ssh_port

407

namespace['ssh_pass'] = self._ssh_pass

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

408

test_start_time = int(time.time())

409

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

410

if self.resultdir:

411

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

412

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

413

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

414

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

415

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

416

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

417

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

418

try:

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

419

try:

420

if install_before and machines:

421

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

422

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

423

if only_collect_crashinfo:

424

return

425

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

426

# determine the dir to write the control files to

427

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

428

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

429

if cfd_specified:

430

temp_control_file_dir = None

431

else:

432

temp_control_file_dir = tempfile.mkdtemp(

433

suffix='temp_control_file_dir')

434

control_file_dir = temp_control_file_dir

435

server_control_file = os.path.join(control_file_dir,

436

SERVER_CONTROL_FILENAME)

437

client_control_file = os.path.join(control_file_dir,

438

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

439

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

440

namespace['control'] = control

441

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

442

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

443

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

444

else:

445

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

446

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

447

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

448

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

449

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

450

# no error occured, so we don't need to collect crashinfo

451

collect_crashinfo = False

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

except:

try:

logging.exception(

'Exception escaped control file, job aborting:')

456

except:

457

pass # don't let logging exceptions here interfere

458

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

459

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

460

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

461

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

462

try:

463

shutil.rmtree(temp_control_file_dir)

464

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

465

logging.warn('Could not remove temp directory %s: %s',

466

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

467

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

468

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

469

namespace['test_start_time'] = test_start_time

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

470

if collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

471

# includes crashdumps

472

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

473

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

474

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

475

if self._uncollected_log_file:

476

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

477

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

478

if cleanup and machines:

479

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

480

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

481

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

482

483

484

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

485

"""

486

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

487

488

tag

489

tag to add to testname

490

url

491

url of the test to run

492

"""

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

493

group, testname = self.pkgmgr.get_package_name(url, 'test')

494

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

495

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

500

except error.TestBaseException, e:

501

self.record(e.exit_status, subdir, testname, str(e))

502

raise

503

except Exception, e:

504

info = str(e) + "\n" + traceback.format_exc()

505

self.record('FAIL', subdir, testname, info)

506

raise

507

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

508

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

509

510

result, exc_info = self._run_group(testname, subdir, group_func)

511

if exc_info and isinstance(exc_info[1], error.TestBaseException):

512

return False

513

elif exc_info:

514

raise exc_info[0], exc_info[1], exc_info[2]

515

else:

516

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

517

518

519

def _run_group(self, name, subdir, function, *args, **dargs):

520

"""\

521

Underlying method for running something inside of a group.

522

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

523

result, exc_info = None, None

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

524

old_record_prefix = self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

525

try:

526

self.record('START', subdir, name)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

527

self._record_prefix += '\t'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

528

try:

529

result = function(*args, **dargs)

530

finally:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

531

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

532

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

533

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

534

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

535

except Exception, e:

536

err_msg = str(e) + '\n'

537

err_msg += traceback.format_exc()

538

self.record('END ABORT', subdir, name, err_msg)

539

raise error.JobError(name + ' failed\n' + traceback.format_exc())

540

else:

541

self.record('END GOOD', subdir, name)

542

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

543

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

544

545

546

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

552

"""

553

554

name = function.__name__

555

556

# Allow the tag for the group to be specified.

557

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

561

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

562

563

564

def run_reboot(self, reboot_func, get_kernel_func):

565

"""\

566

A specialization of run_group meant specifically for handling

567

a reboot. Includes support for capturing the kernel version

568

after the reboot.

569

570

reboot_func: a function that carries out the reboot

571

572

get_kernel_func: a function that returns a string

573

representing the kernel version.

574

"""

575

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

576

old_record_prefix = self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

577

try:

578

self.record('START', None, 'reboot')

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

579

self._record_prefix += '\t'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

580

reboot_func()

581

except Exception, e:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

582

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

583

err_msg = str(e) + '\n' + traceback.format_exc()

584

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

585

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

586

else:

587

kernel = get_kernel_func()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

588

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

589

self.record('END GOOD', None, 'reboot',

590

optional_fields={"kernel": kernel})

591

592

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

593

def run_control(self, path):

594

"""Execute a control file found at path (relative to the autotest

595

path). Intended for executing a control file within a control file,

596

not for running the top-level job control file."""

597

path = os.path.join(self.autodir, path)

598

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

599

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

600

601

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

602

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

603

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

608

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

609

610

611

def _add_sysinfo_loggable(self, loggable, on_every_test):

612

if on_every_test:

613

self.sysinfo.test_loggables.add(loggable)

614

else:

615

self.sysinfo.boot_loggables.add(loggable)

616

617

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

618

def record(self, status_code, subdir, operation, status='',

619

optional_fields=None):

620

"""

621

Record job-level status

622

623

The intent is to make this file both machine parseable and

624

human readable. That involves a little more complexity, but

625

really isn't all that bad ;-)

626

627

Format is <status code>\t<subdir>\t<operation>\t<status>

628

mbligh

1b3b376

2008-09-25 02:46:34 +0000

[diff] [blame]

629

status code: see common_lib.log.is_valid_status()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

630

for valid status definition

631

632

subdir: MUST be a relevant subdirectory in the results,

633

or None, which will be represented as '----'

634

635

operation: description of what you ran (e.g. "dbench", or

636

"mkfs -t foobar /dev/sda9")

637

638

status: error message or "completed sucessfully"

639

640

------------------------------------------------------------

641

642

Initial tabs indicate indent levels for grouping, and is

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

643

governed by self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

644

645

multiline messages have secondary lines prefaced by a double

646

space (' ')

647

648

Executing this method will trigger the logging of all new

649

warnings to date from the various console loggers.

650

"""

651

# poll all our warning loggers for new warnings

652

warnings = self._read_warnings()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

653

old_record_prefix = self._record_prefix

jadmanski

2de8311

2009-04-01 18:21:04 +0000

[diff] [blame]

654

try:

655

if status_code.startswith("END "):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

656

self._record_prefix += "\t"

jadmanski

2de8311

2009-04-01 18:21:04 +0000

[diff] [blame]

657

for timestamp, msg in warnings:

658

self._record("WARN", None, None, msg, timestamp)

659

finally:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

660

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

661

662

# write out the actual status log line

663

self._record(status_code, subdir, operation, status,

664

optional_fields=optional_fields)

665

666

667

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

668

"""Poll all the warning loggers and extract any new warnings that have

669

been logged. If the warnings belong to a category that is currently

670

disabled, this method will discard them and they will no longer be

671

retrievable.

672

673

Returns a list of (timestamp, message) tuples, where timestamp is an

674

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

675

warnings = []

676

while True:

677

# pull in a line of output from every logger that has

678

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

679

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

680

closed_loggers = set()

681

for logger in loggers:

682

line = logger.readline()

683

# record any broken pipes (aka line == empty)

684

if len(line) == 0:

685

closed_loggers.add(logger)

686

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

687

# parse out the warning

688

timestamp, msgtype, msg = line.split('\t', 2)

689

timestamp = int(timestamp)

690

# if the warning is valid, add it to the results

691

if self.warning_manager.is_valid(timestamp, msgtype):

692

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

693

694

# stop listening to loggers that are closed

695

self.warning_loggers -= closed_loggers

696

697

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

706

def _unique_subdirectory(self, base_subdirectory_name):

707

"""Compute a unique results subdirectory based on the given name.

708

709

Appends base_subdirectory_name with a number as necessary to find a

710

directory name that doesn't already exist.

711

"""

712

subdirectory = base_subdirectory_name

713

counter = 1

714

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

715

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

def record_summary(self, status_code, test_name, reason='', attributes=None,

721

distinguishing_attributes=(), child_test_ids=None):

722

"""Record a summary test result.

723

724

@param status_code: status code string, see

725

common_lib.log.is_valid_status()

726

@param test_name: name of the test

727

@param reason: (optional) string providing detailed reason for test

728

outcome

729

@param attributes: (optional) dict of string keyvals to associate with

730

this result

731

@param distinguishing_attributes: (optional) list of attribute names

732

that should be used to distinguish identically-named test

733

results. These attributes should be present in the attributes

734

parameter. This is used to generate user-friendly subdirectory

735

names.

736

@param child_test_ids: (optional) list of test indices for test results

737

used in generating this result.

738

"""

739

subdirectory_name_parts = [test_name]

740

for attribute in distinguishing_attributes:

741

assert attributes

742

assert attribute in attributes, '%s not in %s' % (attribute,

743

attributes)

744

subdirectory_name_parts.append(attributes[attribute])

745

base_subdirectory_name = '.'.join(subdirectory_name_parts)

746

747

subdirectory = self._unique_subdirectory(base_subdirectory_name)

748

subdirectory_path = os.path.join(self.resultdir, subdirectory)

749

os.mkdir(subdirectory_path)

750

751

self.record(status_code, subdirectory, test_name,

752

status=reason, optional_fields={'is_summary': True})

753

754

if attributes:

755

utils.write_keyval(subdirectory_path, attributes)

756

757

if child_test_ids:

758

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

759

summary_data = {'child_test_ids': ids_string}

760

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

764

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

765

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

766

self.record("INFO", None, None,

767

"disabling %s warnings" % warning_type,

768

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

769

770

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

771

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

772

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

773

self.record("INFO", None, None,

774

"enabling %s warnings" % warning_type,

775

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

776

777

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

778

def get_status_log_path(self, subdir=None):

779

"""Return the path to the job status log.

780

781

@param subdir - Optional paramter indicating that you want the path

782

to a subdirectory status log.

783

784

@returns The path where the status log should be.

785

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

786

if self.resultdir:

787

if subdir:

788

return os.path.join(self.resultdir, subdir, "status.log")

789

else:

790

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

791

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

792

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

793

794

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

795

def _update_uncollected_logs_list(self, update_func):

796

"""Updates the uncollected logs list in a multi-process safe manner.

797

798

@param update_func - a function that updates the list of uncollected

799

logs. Should take one parameter, the list to be updated.

800

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

801

if self._uncollected_log_file:

802

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

803

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

804

try:

805

uncollected_logs = pickle.load(log_file)

806

update_func(uncollected_logs)

807

log_file.seek(0)

808

log_file.truncate()

809

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

810

log_file.flush()

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

811

finally:

812

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

817

"""Adds a new set of client logs to the list of uncollected logs,

818

to allow for future log recovery.

819

820

@param host - the hostname of the machine holding the logs

821

@param remote_path - the directory on the remote machine holding logs

822

@param local_path - the local directory to copy the logs into

823

"""

824

def update_func(logs_list):

825

logs_list.append((hostname, remote_path, local_path))

826

self._update_uncollected_logs_list(update_func)

827

828

829

def remove_client_log(self, hostname, remote_path, local_path):

830

"""Removes a set of client logs from the list of uncollected logs,

831

to allow for future log recovery.

832

833

@param host - the hostname of the machine holding the logs

834

@param remote_path - the directory on the remote machine holding logs

835

@param local_path - the local directory to copy the logs into

836

"""

837

def update_func(logs_list):

838

logs_list.remove((hostname, remote_path, local_path))

839

self._update_uncollected_logs_list(update_func)

840

841

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

842

def get_client_logs(self):

843

"""Retrieves the list of uncollected logs, if it exists.

844

845

@returns A list of (host, remote_path, local_path) tuples. Returns

846

an empty list if no uncollected logs file exists.

847

"""

848

log_exists = (self._uncollected_log_file and

849

os.path.exists(self._uncollected_log_file))

850

if log_exists:

851

return pickle.load(open(self._uncollected_log_file))

else:

return []

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

856

def _render_record(self, status_code, subdir, operation, status='',

857

epoch_time=None, record_prefix=None,

858

optional_fields=None):

859

"""

860

Internal Function to generate a record to be written into a

861

status log. For use by server_job.* classes only.

862

"""

863

if subdir:

864

if re.match(r'[\n\t]', subdir):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

865

raise ValueError('Invalid character in subdir string')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

substr = subdir

else:

substr = '----'

mbligh

2008-09-25 02:46:34 +0000

[diff] [blame]

870

if not log.is_valid_status(status_code):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

871

raise ValueError('Invalid status code supplied: %s' % status_code)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

872

if not operation:

873

operation = '----'

874

if re.match(r'[\n\t]', operation):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

875

raise ValueError('Invalid character in operation string')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

876

operation = operation.rstrip()

877

status = status.rstrip()

878

status = re.sub(r"\t", " ", status)

879

# Ensure any continuation lines are marked so we can

880

# detect them in the status file to ensure it is parsable.

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

881

status = re.sub(r"\n", "\n" + self._record_prefix + " ", status)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

882

883

if not optional_fields:

884

optional_fields = {}

885

886

# Generate timestamps for inclusion in the logs

887

if epoch_time is None:

888

epoch_time = int(time.time())

889

local_time = time.localtime(epoch_time)

890

optional_fields["timestamp"] = str(epoch_time)

891

optional_fields["localtime"] = time.strftime("%b %d %H:%M:%S",

892

local_time)

893

894

fields = [status_code, substr, operation]

895

fields += ["%s=%s" % x for x in optional_fields.iteritems()]

896

fields.append(status)

897

898

if record_prefix is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

899

record_prefix = self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

900

901

msg = '\t'.join(str(x) for x in fields)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

902

return record_prefix + msg + '\n'

903

904

905

def _record_prerendered(self, msg):

906

"""

907

Record a pre-rendered msg into the status logs. The only

908

change this makes to the message is to add on the local

909

indentation. Should not be called outside of server_job.*

910

classes. Unlike _record, this does not write the message

911

to standard output.

912

"""

913

lines = []

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

914

status_file = self.get_status_log_path()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

915

status_log = open(status_file, 'a')

916

for line in msg.splitlines():

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

917

line = self._record_prefix + line + '\n'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

918

lines.append(line)

919

status_log.write(line)

920

status_log.close()

921

self.__parse_status(lines)

922

923

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

924

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

925

"""

926

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

927

928

This sets up the control file API by importing modules and making them

929

available under the appropriate names within namespace.

930

931

For use by _execute_code().

932

933

Args:

934

namespace: The namespace dictionary to fill in.

935

protect: Boolean. If True (the default) any operation that would

936

clobber an existing entry in namespace will cause an error.

937

Raises:

938

error.AutoservError: When a name would be clobbered by import.

939

"""

940

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

941

"""

942

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

943

944

Args:

945

module_name: The string module name.

946

names: A limiting list of names to import from module_name. If

947

empty (the default), all names are imported from the module

948

similar to a "from foo.bar import *" statement.

949

Raises:

950

error.AutoservError: When a name being imported would clobber

951

a name already in namespace.

952

"""

953

module = __import__(module_name, {}, {}, names)

954

955

# No names supplied? Import * from the lowest level module.

956

# (Ugh, why do I have to implement this part myself?)

957

if not names:

958

for submodule_name in module_name.split('.')[1:]:

959

module = getattr(module, submodule_name)

960

if hasattr(module, '__all__'):

961

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

966

# doesn't override anything that already exists.

967

for name in names:

968

# Check for conflicts to help prevent future problems.

969

if name in namespace and protect:

970

if namespace[name] is not getattr(module, name):

971

raise error.AutoservError('importing name '

972

'%s from %s %r would override %r' %

973

(name, module_name, getattr(module, name),

974

namespace[name]))

975

else:

976

# Encourage cleanliness and the use of __all__ for a

977

# more concrete API with less surprises on '*' imports.

978

warnings.warn('%s (%r) being imported from %s for use '

979

'in server control files is not the '

980

'first occurrance of that import.' %

981

(name, namespace[name], module_name))

982

983

namespace[name] = getattr(module, name)

984

985

986

# This is the equivalent of prepending a bunch of import statements to

987

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

988

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

989

_import_names('autotest_lib.server',

990

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

991

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

992

_import_names('autotest_lib.server.subcommand',

993

('parallel', 'parallel_simple', 'subcommand'))

994

_import_names('autotest_lib.server.utils',

995

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

996

_import_names('autotest_lib.client.common_lib.error')

997

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

998

999

# Inject ourself as the job object into other classes within the API.

1000

# (Yuck, this injection is a gross thing be part of a public API. -gps)

1001

#

1002

# XXX Base & SiteAutotest do not appear to use .job. Who does?

1003

namespace['autotest'].Autotest.job = self

1004

# server.hosts.base_classes.Host uses .job.

1005

namespace['hosts'].Host.job = self

1006

1007

1008

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1009

"""

1010

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1011

1012

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

1017

namespace: A dict containing names to make available during execution.

1018

protect: Boolean. If True (the default) a copy of the namespace dict

1019

is used during execution to prevent the code from modifying its

1020

contents outside of this function. If False the raw dict is

1021

passed in and modifications will be allowed.

1022

"""

1023

if protect:

1024

namespace = namespace.copy()

1025

self._fill_server_control_namespace(namespace, protect=protect)

1026

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1027

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1028

machines_text = '\n'.join(self.machines) + '\n'

1029

# Only rewrite the file if it does not match our machine list.

1030

try:

1031

machines_f = open(MACHINES_FILENAME, 'r')

1032

existing_machines_text = machines_f.read()

1033

machines_f.close()

1034

except EnvironmentError:

1035

existing_machines_text = None

1036

if machines_text != existing_machines_text:

1037

utils.open_write_close(MACHINES_FILENAME, machines_text)

1038

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1039

1040

1041

def _record(self, status_code, subdir, operation, status='',

1042

epoch_time=None, optional_fields=None):

1043

"""

1044

Actual function for recording a single line into the status

1045

logs. Should never be called directly, only by job.record as

1046

this would bypass the console monitor logging.

1047

"""

1048

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1049

msg = self._render_record(status_code, subdir, operation, status,

1050

epoch_time, optional_fields=optional_fields)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1051

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

1052

status_file = self.get_status_log_path()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1053

sys.stdout.write(msg)

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

1054

if status_file:

1055

open(status_file, "a").write(msg)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1056

if subdir:

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

1057

sub_status_file = self.get_status_log_path(subdir)

1058

open(sub_status_file, "a").write(msg)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1059

self.__parse_status(msg.splitlines())

1060

1061

1062

def __parse_status(self, new_lines):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1063

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1064

return

1065

new_tests = self.parser.process_lines(new_lines)

1066

for test in new_tests:

1067

self.__insert_test(test)

1068

1069

1070

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1071

"""

1072

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1073

database. This method will not raise an exception, even if an

1074

error occurs during the insert, to avoid failing a test

1075

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1076

self.num_tests_run += 1

1077

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1078

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1079

try:

1080

self.results_db.insert_test(self.job_model, test)

1081

except Exception:

1082

msg = ("WARNING: An unexpected error occured while "

1083

"inserting test results into the database. "

1084

"Ignoring error.\n" + traceback.format_exc())

1085

print >> sys.stderr, msg

1086

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1087

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1088

def preprocess_client_state(self):

1089

"""

1090

Produce a state file for initializing the state of a client job.

1091

1092

Creates a new client state file with all the current server state, as

1093

well as some pre-set client state.

1094

1095

@returns The path of the file the state was written into.

1096

"""

1097

# initialize the sysinfo state

1098

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1099

1100

# dump the state out to a tempfile

1101

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1102

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame^]

1103

1104

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1105

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1110

"""

1111

Update the state of this job with the state from a client job.

1112

1113

Updates the state of the server side of a job with the final state

1114

of a client job that was run. Updates the non-client-specific state,

1115

pulls in some specific bits from the client-specific state, and then

1116

discards the rest. Removes the state file afterwards

1117

1118

@param state_file A path to the state file from the client.

1119

"""

1120

# update the on-disk state

1121

self._state.read_from_file(state_path)

1122

try:

1123

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame^]

1124

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1125

# ignore file-not-found errors

1126

if e.errno != errno.ENOENT:

1127

raise

1128

1129

# update the sysinfo state

1130

if self._state.has('client', 'sysinfo'):

1131

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1132

1133

# drop all the client-specific state

1134

self._state.discard_namespace('client')

1135

1136

mbligh

a700772

2009-01-13 00:37:11 +0000

[diff] [blame]

1137

site_server_job = utils.import_site_class(

1138

__file__, "autotest_lib.server.site_server_job", "site_server_job",

1139

base_server_job)

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1140

mbligh

0a8c332

2009-04-28 18:32:19 +0000

[diff] [blame]

1141

class server_job(site_server_job):

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1142

pass

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1143

1144

1145

class warning_manager(object):

1146

"""Class for controlling warning logs. Manages the enabling and disabling

1147

of warnings."""

1148

def __init__(self):

1149

# a map of warning types to a list of disabled time intervals

1150

self.disabled_warnings = {}

1151

1152

1153

def is_valid(self, timestamp, warning_type):

1154

"""Indicates if a warning (based on the time it occured and its type)

1155

is a valid warning. A warning is considered "invalid" if this type of

1156

warning was marked as "disabled" at the time the warning occured."""

1157

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1158

for start, end in disabled_intervals:

1159

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1165

"""As of now, disables all further warnings of this type."""

1166

intervals = self.disabled_warnings.setdefault(warning_type, [])

1167

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1168

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1169

1170

1171

def enable_warnings(self, warning_type, current_time_func=time.time):

1172

"""As of now, enables all further warnings of this type."""

1173

intervals = self.disabled_warnings.get(warning_type, [])

1174

if intervals and intervals[-1][1] is None:

jadmanski