Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2011-03-31 14:45:32 -0700

[diff] [blame]

10

import Queue, threading

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

11

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

12

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

13

from autotest_lib.client.common_lib import base_job

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

14

from autotest_lib.client.common_lib import error, log, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

15

from autotest_lib.client.common_lib import logging_manager

Paul Pendlebury

f807c18

2011-04-05 11:24:34 -0700

[diff] [blame]

16

from autotest_lib.server import test, subcommand, profilers, server_job_utils

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

17

from autotest_lib.server.hosts import abstract_ssh

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

18

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

19

20

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

21

def _control_segment_path(name):

22

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

23

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

24

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

25

26

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

27

CLIENT_CONTROL_FILENAME = 'control'

28

SERVER_CONTROL_FILENAME = 'control.srv'

29

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

30

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

31

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

32

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

33

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

34

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

35

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

36

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

37

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

38

REPAIR_CONTROL_FILE = _control_segment_path('repair')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

39

40

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

41

# by default provide a stub that generates no site data

42

def _get_site_job_data_dummy(job):

return {}

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

46

# load up site-specific code for generating site-specific job data

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

47

get_site_job_data = utils.import_site_function(__file__,

jadmanski

c0a623d

2009-03-03 21:11:48 +0000

[diff] [blame]

48

"autotest_lib.server.site_server_job", "get_site_job_data",

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

49

_get_site_job_data_dummy)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

50

51

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

52

class status_indenter(base_job.status_indenter):

53

"""Provide a simple integer-backed status indenter."""

def __init__(self):

self._indent = 0

@property

def indent(self):

return self._indent

def increment(self):

self._indent += 1

def decrement(self):

self._indent -= 1

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

71

def get_context(self):

72

"""Returns a context object for use by job.get_record_context."""

73

class context(object):

74

def __init__(self, indenter, indent):

75

self._indenter = indenter

76

self._indent = indent

77

def restore(self):

78

self._indenter._indent = self._indent

79

return context(self, self._indent)

80

81

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

82

class server_job_record_hook(object):

83

"""The job.record hook for server job. Used to inject WARN messages from

84

the console or vlm whenever new logs are written, and to echo any logs

85

to INFO level logging. Implemented as a class so that it can use state to

86

block recursive calls, so that the hook can call job.record itself to

87

log WARN messages.

88

89

Depends on job._read_warnings and job._logger.

90

"""

91

def __init__(self, job):

92

self._job = job

93

self._being_called = False

94

95

96

def __call__(self, entry):

97

"""A wrapper around the 'real' record hook, the _hook method, which

98

prevents recursion. This isn't making any effort to be threadsafe,

99

the intent is to outright block infinite recursion via a

100

job.record->_hook->job.record->_hook->job.record... chain."""

101

if self._being_called:

102

return

103

self._being_called = True

104

try:

105

self._hook(self._job, entry)

106

finally:

107

self._being_called = False

@staticmethod

def _hook(job, entry):

112

"""The core hook, which can safely call job.record."""

113

entries = []

114

# poll all our warning loggers for new warnings

115

for timestamp, msg in job._read_warnings():

116

warning_entry = base_job.status_log_entry(

117

'WARN', None, None, msg, {}, timestamp=timestamp)

118

entries.append(warning_entry)

119

job.record_entry(warning_entry)

120

# echo rendered versions of all the status logs to info

121

entries.append(entry)

122

for entry in entries:

123

rendered_entry = job._logger.render_entry(entry)

124

logging.info(rendered_entry)

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

125

job._parse_status(rendered_entry)

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

126

127

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

128

class base_server_job(base_job.base_job):

129

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

130

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

131

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

140

"""

141

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

142

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

143

144

def __init__(self, control, args, resultdir, label, user, machines,

145

client=False, parse_job='',

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

146

ssh_user='root', ssh_port=22, ssh_pass='',

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

147

group_name='', tag='',

148

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

149

"""

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

150

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

151

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

152

@param control: The pathname of the control file.

153

@param args: Passed to the control file.

154

@param resultdir: Where to throw the results.

155

@param label: Description of the job.

156

@param user: Username for the job (email address).

157

@param client: True if this is a client-side control file.

158

@param parse_job: string, if supplied it is the job execution tag that

159

the results will be passed through to the TKO parser with.

160

@param ssh_user: The SSH username. [root]

161

@param ssh_port: The SSH port number. [22]

162

@param ssh_pass: The SSH passphrase, if needed.

163

@param group_name: If supplied, this will be written out as

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

164

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

165

@param tag: The job execution tag from the scheduler. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

166

@param control_filename: The filename where the server control file

167

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

168

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

169

super(base_server_job, self).__init__(resultdir=resultdir)

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

170

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

171

path = os.path.dirname(__file__)

172

self.control = control

173

self._uncollected_log_file = os.path.join(self.resultdir,

174

'uncollected_logs')

175

debugdir = os.path.join(self.resultdir, 'debug')

176

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

183

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

184

self.args = args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

185

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

186

self._client = client

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

187

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

188

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

189

self._ssh_user = ssh_user

190

self._ssh_port = ssh_port

191

self._ssh_pass = ssh_pass

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

192

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

193

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

194

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

195

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

196

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

197

self._control_filename = control_filename

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

198

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

199

self.logging = logging_manager.get_logging_manager(

200

manage_stdout_and_stderr=True, redirect_fds=True)

201

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

202

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

203

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

204

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

205

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

206

job_data = {'label' : label, 'user' : user,

207

'hostname' : ','.join(machines),

Eric Li

861b2d5

2011-02-04 14:50:35 -0800

[diff] [blame]

208

'drone' : platform.node(),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

209

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

210

'job_started' : str(int(time.time()))}

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

211

if group_name:

212

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

213

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

214

# only write these keyvals out on the first job in a resultdir

215

if 'job_started' not in utils.read_keyval(self.resultdir):

216

job_data.update(get_site_job_data(self))

217

utils.write_keyval(self.resultdir, job_data)

218

219

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

220

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

221

self.pkgmgr = packages.PackageManager(

222

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

223

self.num_tests_run = 0

224

self.num_tests_failed = 0

225

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

226

self._register_subcommand_hooks()

227

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

228

# these components aren't usable on the server

229

self.bootloader = None

230

self.harness = None

231

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

232

# set up the status logger

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

233

self._indenter = status_indenter()

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

234

self._logger = base_job.status_logger(

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

235

self, self._indenter, 'status.log', 'status.log',

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

236

record_hook=server_job_record_hook(self))

237

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

238

239

@classmethod

240

def _find_base_directories(cls):

241

"""

242

Determine locations of autodir, clientdir and serverdir. Assumes

243

that this file is located within serverdir and uses __file__ along

244

with relative paths to resolve the location.

245

"""

246

serverdir = os.path.abspath(os.path.dirname(__file__))

247

autodir = os.path.normpath(os.path.join(serverdir, '..'))

248

clientdir = os.path.join(autodir, 'client')

249

return autodir, clientdir, serverdir

250

251

252

def _find_resultdir(self, resultdir):

253

"""

254

Determine the location of resultdir. For server jobs we expect one to

255

always be explicitly passed in to __init__, so just return that.

256

"""

257

if resultdir:

258

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

262

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

263

def _get_status_logger(self):

264

"""Return a reference to the status logger."""

return self._logger

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

268

@staticmethod

269

def _load_control_file(path):

270

f = open(path)

271

try:

272

control_file = f.read()

273

finally:

274

f.close()

275

return re.sub('\r', '', control_file)

276

277

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

278

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

279

"""

280

Register some hooks into the subcommand modules that allow us

281

to properly clean up self.hosts created in forked subprocesses.

282

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

283

def on_fork(cmd):

284

self._existing_hosts_on_fork = set(self.hosts)

285

def on_join(cmd):

286

new_hosts = self.hosts - self._existing_hosts_on_fork

287

for host in new_hosts:

288

host.close()

289

subcommand.subcommand.register_fork_hook(on_fork)

290

subcommand.subcommand.register_join_hook(on_join)

291

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

292

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

293

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

294

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

295

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

296

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

297

the database if necessary.

298

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

299

if not self._using_parser:

300

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

301

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

302

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

303

parse_log = open(parse_log, 'w', 0)

304

tko_utils.redirect_parser_debugging(parse_log)

305

# create a job model object and set up the db

306

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

307

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

308

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

309

self.parser.start(self.job_model)

310

# check if a job already exists in the db and insert it if

311

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

312

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

313

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

314

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

315

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

316

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

317

self.job_model.index = job_idx

318

self.job_model.machine_idx = machine_idx

319

320

321

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

322

"""

323

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

324

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

325

remaining test results to the results db)

326

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

327

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

328

return

329

final_tests = self.parser.end()

330

for test in final_tests:

331

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

332

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def verify(self):

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

337

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

338

if self.resultdir:

339

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

340

try:

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

341

namespace = {'machines' : self.machines, 'job' : self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

342

'ssh_user' : self._ssh_user,

343

'ssh_port' : self._ssh_port,

344

'ssh_pass' : self._ssh_pass}

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

345

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

346

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

347

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

348

self.record('ABORT', None, None, msg)

raise

def repair(self, host_protection):

353

if not self.machines:

354

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

355

if self.resultdir:

356

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

357

namespace = {'machines': self.machines, 'job': self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

358

'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,

359

'ssh_pass': self._ssh_pass,

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

360

'protection_level': host_protection}

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

361

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

362

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def precheck(self):

"""

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

373

"""

374

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

380

"""

381

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

387

"""

388

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

393

def _make_parallel_wrapper(self, function, machines, log):

394

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

395

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

396

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

397

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

398

self._parse_job += "/" + machine

399

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

400

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

401

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

402

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

403

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

404

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

405

result = function(machine)

406

self.cleanup_parser()

407

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

408

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

409

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

410

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

411

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

412

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

413

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

414

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

415

result = function(machine)

416

return result

417

else:

418

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

423

return_results=False):

424

"""

425

Run 'function' using parallel_simple, with an extra wrapper to handle

426

the necessary setup for continuous parsing, if possible. If continuous

427

parsing is already properly initialized then this should just work.

428

429

@param function: A callable to run in parallel given each machine.

430

@param machines: A list of machine names to be passed one per subcommand

431

invocation of function.

432

@param log: If True, output will be written to output in a subdirectory

433

named after each machine.

434

@param timeout: Seconds after which the function call should timeout.

435

@param return_results: If True instead of an AutoServError being raised

436

on any error a list of the results|exceptions from the function

437

called on each arg is returned. [default: False]

438

439

@raises error.AutotestError: If any of the functions failed.

440

"""

441

wrapper = self._make_parallel_wrapper(function, machines, log)

442

return subcommand.parallel_simple(wrapper, machines,

443

log=log, timeout=timeout,

444

return_results=return_results)

445

446

447

def parallel_on_machines(self, function, machines, timeout=None):

448

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

449

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

450

@param machines: A list of machines to call function(machine) on.

451

@param timeout: Seconds after which the function call should timeout.

452

453

@returns A list of machines on which function(machine) returned

454

without raising an exception.

455

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

456

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

457

return_results=True)

458

success_machines = []

459

for result, machine in itertools.izip(results, machines):

460

if not isinstance(result, Exception):

461

success_machines.append(machine)

462

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

463

464

Paul Pendlebury

2011-03-31 14:45:32 -0700

[diff] [blame]

465

def distribute_across_machines(self, tests, machines):

466

"""Run each test in tests once using machines.

467

468

Instead of running each test on each machine like parallel_on_machines,

469

run each test once across all machines. Put another way, the total

470

number of tests run by parallel_on_machines is len(tests) *

471

len(machines). The number of tests run by distribute_across_machines is

len(tests).

Args:

tests: List of tests to run.

476

machines: list of machines to use.

477

"""

478

# The Queue is thread safe, but since a machine may have to search

479

# through the queue to find a valid test the lock provides exclusive

480

# queue access for more than just the get call.

481

test_queue = Queue.Queue()

482

test_queue_lock = threading.Lock()

483

Paul Pendlebury

1f6f3e7

2011-04-13 11:16:44 -0700

[diff] [blame^]

484

machine_workers = [server_job_utils.machine_worker(self,

485

machine,

Paul Pendlebury

f807c18

2011-04-05 11:24:34 -0700

[diff] [blame]

486

self.resultdir,

487

test_queue,

488

test_queue_lock)

Paul Pendlebury

2011-03-31 14:45:32 -0700

[diff] [blame]

489

for machine in machines]

490

491

# To (potentially) speed up searching for valid tests create a list of

492

# unique attribute sets present in the machines for this job. If sets

493

# were hashable we could just use a dictionary for fast verification.

494

# This at least reduces the search space from the number of machines to

495

# the number of unique machines.

496

unique_machine_attributes = []

497

for mw in machine_workers:

498

if not mw.attribute_set in unique_machine_attributes:

499

unique_machine_attributes.append(mw.attribute_set)

500

501

# Only queue tests which are valid on at least one machine. Record

502

# skipped tests in the status.log file using record_skipped_test().

503

for test_entry in tests:

Paul Pendlebury

f807c18

2011-04-05 11:24:34 -0700

[diff] [blame]

504

ti = server_job_utils.test_item(*test_entry)

Paul Pendlebury

2011-03-31 14:45:32 -0700

[diff] [blame]

505

machine_found = False

506

for ma in unique_machine_attributes:

if ti.validate(ma):

test_queue.put(ti)

machine_found = True

break

if not machine_found:

512

self.record_skipped_test(ti)

513

514

# Run valid tests and wait for completion.

515

for worker in machine_workers:

worker.start()

test_queue.join()

def record_skipped_test(self, skipped_test, message=None):

521

"""Insert a failure record into status.log for this test."""

522

msg = message

523

if msg is None:

524

msg = 'No valid machines found for test %s.' % skipped_test

525

logging.info(msg)

526

self.record('START', None, skipped_test.test_name)

527

self.record('INFO', None, skipped_test.test_name, msg)

528

self.record('END TEST_NA', None, skipped_test.test_name, msg)

529

530

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

531

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

532

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

533

collect_crashdumps=True, namespace={}, control=None,

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

534

control_file_dir=None, only_collect_crashinfo=False):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

535

# for a normal job, make sure the uncollected logs file exists

536

# for a crashinfo-only run it should already exist, bail out otherwise

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

537

created_uncollected_logs = False

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

538

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

539

if only_collect_crashinfo:

540

# if this is a crashinfo-only run, and there were no existing

541

# uncollected logs, just bail out early

542

logging.info("No existing uncollected logs, "

543

"skipping crashinfo collection")

544

return

545

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

546

log_file = open(self._uncollected_log_file, "w")

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

547

pickle.dump([], log_file)

548

log_file.close()

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

549

created_uncollected_logs = True

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

550

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

551

# use a copy so changes don't affect the original dictionary

552

namespace = namespace.copy()

553

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

554

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

555

if self.control is None:

556

control = ''

557

else:

558

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

559

if control_file_dir is None:

560

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

561

562

self.aborted = False

563

namespace['machines'] = machines

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

564

namespace['args'] = self.args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

565

namespace['job'] = self

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

566

namespace['ssh_user'] = self._ssh_user

567

namespace['ssh_port'] = self._ssh_port

568

namespace['ssh_pass'] = self._ssh_pass

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

569

test_start_time = int(time.time())

570

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

571

if self.resultdir:

572

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

573

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

574

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

575

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

576

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

577

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

578

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

579

try:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

580

try:

581

if install_before and machines:

582

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

583

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

584

if only_collect_crashinfo:

585

return

586

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

587

# determine the dir to write the control files to

588

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

589

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

590

if cfd_specified:

591

temp_control_file_dir = None

592

else:

593

temp_control_file_dir = tempfile.mkdtemp(

594

suffix='temp_control_file_dir')

595

control_file_dir = temp_control_file_dir

596

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

597

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

598

client_control_file = os.path.join(control_file_dir,

599

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

600

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

601

namespace['control'] = control

602

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

603

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

604

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

605

else:

606

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

607

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

608

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

609

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

610

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

611

# no error occured, so we don't need to collect crashinfo

612

collect_crashinfo = False

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

613

except Exception, e:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

614

try:

615

logging.exception(

616

'Exception escaped control file, job aborting:')

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

617

self.record('INFO', None, None, str(e),

618

{'job_abort_reason': str(e)})

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

619

except:

620

pass # don't let logging exceptions here interfere

621

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

622

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

623

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

624

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

625

try:

626

shutil.rmtree(temp_control_file_dir)

627

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

628

logging.warn('Could not remove temp directory %s: %s',

629

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

630

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

631

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

632

namespace['test_start_time'] = test_start_time

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

633

if collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

634

# includes crashdumps

635

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

636

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

637

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

638

if self._uncollected_log_file and created_uncollected_logs:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

639

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

640

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

641

if cleanup and machines:

642

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

643

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

644

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

645

646

647

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

648

"""

649

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

650

651

tag

652

tag to add to testname

653

url

654

url of the test to run

655

"""

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

656

group, testname = self.pkgmgr.get_package_name(url, 'test')

657

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

658

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

663

except error.TestBaseException, e:

664

self.record(e.exit_status, subdir, testname, str(e))

665

raise

666

except Exception, e:

667

info = str(e) + "\n" + traceback.format_exc()

668

self.record('FAIL', subdir, testname, info)

669

raise

670

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

671

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

672

673

result, exc_info = self._run_group(testname, subdir, group_func)

674

if exc_info and isinstance(exc_info[1], error.TestBaseException):

675

return False

676

elif exc_info:

677

raise exc_info[0], exc_info[1], exc_info[2]

678

else:

679

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

680

681

682

def _run_group(self, name, subdir, function, *args, **dargs):

683

"""\

684

Underlying method for running something inside of a group.

685

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

686

result, exc_info = None, None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

687

try:

688

self.record('START', subdir, name)

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

689

result = function(*args, **dargs)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

690

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

691

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

692

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

693

except Exception, e:

694

err_msg = str(e) + '\n'

695

err_msg += traceback.format_exc()

696

self.record('END ABORT', subdir, name, err_msg)

697

raise error.JobError(name + ' failed\n' + traceback.format_exc())

698

else:

699

self.record('END GOOD', subdir, name)

700

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

701

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

702

703

704

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

710

"""

711

712

name = function.__name__

713

714

# Allow the tag for the group to be specified.

715

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

719

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

720

721

722

def run_reboot(self, reboot_func, get_kernel_func):

723

"""\

724

A specialization of run_group meant specifically for handling

725

a reboot. Includes support for capturing the kernel version

726

after the reboot.

727

728

reboot_func: a function that carries out the reboot

729

730

get_kernel_func: a function that returns a string

731

representing the kernel version.

732

"""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

733

try:

734

self.record('START', None, 'reboot')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

735

reboot_func()

736

except Exception, e:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

737

err_msg = str(e) + '\n' + traceback.format_exc()

738

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

739

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

740

else:

741

kernel = get_kernel_func()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

742

self.record('END GOOD', None, 'reboot',

743

optional_fields={"kernel": kernel})

744

745

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

746

def run_control(self, path):

747

"""Execute a control file found at path (relative to the autotest

748

path). Intended for executing a control file within a control file,

749

not for running the top-level job control file."""

750

path = os.path.join(self.autodir, path)

751

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

752

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

753

754

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

755

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

756

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

761

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

762

763

764

def _add_sysinfo_loggable(self, loggable, on_every_test):

765

if on_every_test:

766

self.sysinfo.test_loggables.add(loggable)

767

else:

768

self.sysinfo.boot_loggables.add(loggable)

769

770

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

771

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

772

"""Poll all the warning loggers and extract any new warnings that have

773

been logged. If the warnings belong to a category that is currently

774

disabled, this method will discard them and they will no longer be

775

retrievable.

776

777

Returns a list of (timestamp, message) tuples, where timestamp is an

778

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

779

warnings = []

780

while True:

781

# pull in a line of output from every logger that has

782

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

783

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

784

closed_loggers = set()

785

for logger in loggers:

786

line = logger.readline()

787

# record any broken pipes (aka line == empty)

788

if len(line) == 0:

789

closed_loggers.add(logger)

790

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

791

# parse out the warning

792

timestamp, msgtype, msg = line.split('\t', 2)

793

timestamp = int(timestamp)

794

# if the warning is valid, add it to the results

795

if self.warning_manager.is_valid(timestamp, msgtype):

796

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

797

798

# stop listening to loggers that are closed

799

self.warning_loggers -= closed_loggers

800

801

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

810

def _unique_subdirectory(self, base_subdirectory_name):

811

"""Compute a unique results subdirectory based on the given name.

812

813

Appends base_subdirectory_name with a number as necessary to find a

814

directory name that doesn't already exist.

815

"""

816

subdirectory = base_subdirectory_name

817

counter = 1

818

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

819

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

824

def get_record_context(self):

825

"""Returns an object representing the current job.record context.

826

827

The object returned is an opaque object with a 0-arg restore method

828

which can be called to restore the job.record context (i.e. indentation)

829

to the current level. The intention is that it should be used when

830

something external which generate job.record calls (e.g. an autotest

831

client) can fail catastrophically and the server job record state

832

needs to be reset to its original "known good" state.

833

834

@return: A context object with a 0-arg restore() method."""

835

return self._indenter.get_context()

836

837

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

838

def record_summary(self, status_code, test_name, reason='', attributes=None,

839

distinguishing_attributes=(), child_test_ids=None):

840

"""Record a summary test result.

841

842

@param status_code: status code string, see

843

common_lib.log.is_valid_status()

844

@param test_name: name of the test

845

@param reason: (optional) string providing detailed reason for test

846

outcome

847

@param attributes: (optional) dict of string keyvals to associate with

848

this result

849

@param distinguishing_attributes: (optional) list of attribute names

850

that should be used to distinguish identically-named test

851

results. These attributes should be present in the attributes

852

parameter. This is used to generate user-friendly subdirectory

853

names.

854

@param child_test_ids: (optional) list of test indices for test results

855

used in generating this result.

856

"""

857

subdirectory_name_parts = [test_name]

858

for attribute in distinguishing_attributes:

859

assert attributes

860

assert attribute in attributes, '%s not in %s' % (attribute,

861

attributes)

862

subdirectory_name_parts.append(attributes[attribute])

863

base_subdirectory_name = '.'.join(subdirectory_name_parts)

864

865

subdirectory = self._unique_subdirectory(base_subdirectory_name)

866

subdirectory_path = os.path.join(self.resultdir, subdirectory)

867

os.mkdir(subdirectory_path)

868

869

self.record(status_code, subdirectory, test_name,

870

status=reason, optional_fields={'is_summary': True})

871

872

if attributes:

873

utils.write_keyval(subdirectory_path, attributes)

874

875

if child_test_ids:

876

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

877

summary_data = {'child_test_ids': ids_string}

878

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

882

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

883

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

884

self.record("INFO", None, None,

885

"disabling %s warnings" % warning_type,

886

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

887

888

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

889

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

890

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

891

self.record("INFO", None, None,

892

"enabling %s warnings" % warning_type,

893

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

894

895

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

896

def get_status_log_path(self, subdir=None):

897

"""Return the path to the job status log.

898

899

@param subdir - Optional paramter indicating that you want the path

900

to a subdirectory status log.

901

902

@returns The path where the status log should be.

903

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

904

if self.resultdir:

905

if subdir:

906

return os.path.join(self.resultdir, subdir, "status.log")

907

else:

908

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

909

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

910

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

911

912

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

913

def _update_uncollected_logs_list(self, update_func):

914

"""Updates the uncollected logs list in a multi-process safe manner.

915

916

@param update_func - a function that updates the list of uncollected

917

logs. Should take one parameter, the list to be updated.

918

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

919

if self._uncollected_log_file:

920

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

921

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

922

try:

923

uncollected_logs = pickle.load(log_file)

924

update_func(uncollected_logs)

925

log_file.seek(0)

926

log_file.truncate()

927

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

928

log_file.flush()

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

929

finally:

930

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

935

"""Adds a new set of client logs to the list of uncollected logs,

936

to allow for future log recovery.

937

938

@param host - the hostname of the machine holding the logs

939

@param remote_path - the directory on the remote machine holding logs

940

@param local_path - the local directory to copy the logs into

941

"""

942

def update_func(logs_list):

943

logs_list.append((hostname, remote_path, local_path))

944

self._update_uncollected_logs_list(update_func)

945

946

947

def remove_client_log(self, hostname, remote_path, local_path):

948

"""Removes a set of client logs from the list of uncollected logs,

949

to allow for future log recovery.

950

951

@param host - the hostname of the machine holding the logs

952

@param remote_path - the directory on the remote machine holding logs

953

@param local_path - the local directory to copy the logs into

954

"""

955

def update_func(logs_list):

956

logs_list.remove((hostname, remote_path, local_path))

957

self._update_uncollected_logs_list(update_func)

958

959

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

960

def get_client_logs(self):

961

"""Retrieves the list of uncollected logs, if it exists.

962

963

@returns A list of (host, remote_path, local_path) tuples. Returns

964

an empty list if no uncollected logs file exists.

965

"""

966

log_exists = (self._uncollected_log_file and

967

os.path.exists(self._uncollected_log_file))

968

if log_exists:

969

return pickle.load(open(self._uncollected_log_file))

else:

return []

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

974

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

975

"""

976

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

977

978

This sets up the control file API by importing modules and making them

979

available under the appropriate names within namespace.

980

981

For use by _execute_code().

982

983

Args:

984

namespace: The namespace dictionary to fill in.

985

protect: Boolean. If True (the default) any operation that would

986

clobber an existing entry in namespace will cause an error.

987

Raises:

988

error.AutoservError: When a name would be clobbered by import.

989

"""

990

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

991

"""

992

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

993

994

Args:

995

module_name: The string module name.

996

names: A limiting list of names to import from module_name. If

997

empty (the default), all names are imported from the module

998

similar to a "from foo.bar import *" statement.

999

Raises:

1000

error.AutoservError: When a name being imported would clobber

1001

a name already in namespace.

1002

"""

1003

module = __import__(module_name, {}, {}, names)

1004

1005

# No names supplied? Import * from the lowest level module.

1006

# (Ugh, why do I have to implement this part myself?)

1007

if not names:

1008

for submodule_name in module_name.split('.')[1:]:

1009

module = getattr(module, submodule_name)

1010

if hasattr(module, '__all__'):

1011

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

1016

# doesn't override anything that already exists.

1017

for name in names:

1018

# Check for conflicts to help prevent future problems.

1019

if name in namespace and protect:

1020

if namespace[name] is not getattr(module, name):

1021

raise error.AutoservError('importing name '

1022

'%s from %s %r would override %r' %

1023

(name, module_name, getattr(module, name),

1024

namespace[name]))

1025

else:

1026

# Encourage cleanliness and the use of __all__ for a

1027

# more concrete API with less surprises on '*' imports.

1028

warnings.warn('%s (%r) being imported from %s for use '

1029

'in server control files is not the '

1030

'first occurrance of that import.' %

1031

(name, namespace[name], module_name))

1032

1033

namespace[name] = getattr(module, name)

1034

1035

1036

# This is the equivalent of prepending a bunch of import statements to

1037

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

1038

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1039

_import_names('autotest_lib.server',

1040

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

1041

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

1042

_import_names('autotest_lib.server.subcommand',

1043

('parallel', 'parallel_simple', 'subcommand'))

1044

_import_names('autotest_lib.server.utils',

1045

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

1046

_import_names('autotest_lib.client.common_lib.error')

1047

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

1048

1049

# Inject ourself as the job object into other classes within the API.

1050

# (Yuck, this injection is a gross thing be part of a public API. -gps)

1051

#

1052

# XXX Base & SiteAutotest do not appear to use .job. Who does?

1053

namespace['autotest'].Autotest.job = self

1054

# server.hosts.base_classes.Host uses .job.

1055

namespace['hosts'].Host.job = self

Eric Li

10222b8

2010-11-24 09:33:15 -0800

[diff] [blame]

1056

namespace['hosts'].factory.ssh_user = self._ssh_user

1057

namespace['hosts'].factory.ssh_port = self._ssh_port

1058

namespace['hosts'].factory.ssh_pass = self._ssh_pass

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1059

1060

1061

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1062

"""

1063

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1064

1065

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

1070

namespace: A dict containing names to make available during execution.

1071

protect: Boolean. If True (the default) a copy of the namespace dict

1072

is used during execution to prevent the code from modifying its

1073

contents outside of this function. If False the raw dict is

1074

passed in and modifications will be allowed.

1075

"""

1076

if protect:

1077

namespace = namespace.copy()

1078

self._fill_server_control_namespace(namespace, protect=protect)

1079

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1080

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1081

machines_text = '\n'.join(self.machines) + '\n'

1082

# Only rewrite the file if it does not match our machine list.

1083

try:

1084

machines_f = open(MACHINES_FILENAME, 'r')

1085

existing_machines_text = machines_f.read()

1086

machines_f.close()

1087

except EnvironmentError:

1088

existing_machines_text = None

1089

if machines_text != existing_machines_text:

1090

utils.open_write_close(MACHINES_FILENAME, machines_text)

1091

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1092

1093

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1094

def _parse_status(self, new_line):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1095

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1096

return

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1097

new_tests = self.parser.process_lines([new_line])

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1098

for test in new_tests:

1099

self.__insert_test(test)

1100

1101

1102

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1103

"""

1104

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1105

database. This method will not raise an exception, even if an

1106

error occurs during the insert, to avoid failing a test

1107

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1108

self.num_tests_run += 1

1109

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1110

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1111

try:

1112

self.results_db.insert_test(self.job_model, test)

1113

except Exception:

1114

msg = ("WARNING: An unexpected error occured while "

1115

"inserting test results into the database. "

1116

"Ignoring error.\n" + traceback.format_exc())

1117

print >> sys.stderr, msg

1118

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1119

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1120

def preprocess_client_state(self):

1121

"""

1122

Produce a state file for initializing the state of a client job.

1123

1124

Creates a new client state file with all the current server state, as

1125

well as some pre-set client state.

1126

1127

@returns The path of the file the state was written into.

1128

"""

1129

# initialize the sysinfo state

1130

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1131

1132

# dump the state out to a tempfile

1133

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1134

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1135

1136

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1137

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1142

"""

1143

Update the state of this job with the state from a client job.

1144

1145

Updates the state of the server side of a job with the final state

1146

of a client job that was run. Updates the non-client-specific state,

1147

pulls in some specific bits from the client-specific state, and then

1148

discards the rest. Removes the state file afterwards

1149

1150

@param state_file A path to the state file from the client.

1151

"""

1152

# update the on-disk state

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1153

try:

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1154

self._state.read_from_file(state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1155

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1156

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1157

# ignore file-not-found errors

1158

if e.errno != errno.ENOENT:

1159

raise

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1160

else:

1161

logging.debug('Client state file %s not found', state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1162

1163

# update the sysinfo state

1164

if self._state.has('client', 'sysinfo'):

1165

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1166

1167

# drop all the client-specific state

1168

self._state.discard_namespace('client')

1169

1170

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

1171

def clear_all_known_hosts(self):

1172

"""Clears known hosts files for all AbstractSSHHosts."""

1173

for host in self.hosts:

1174

if isinstance(host, abstract_ssh.AbstractSSHHost):

1175

host.clear_known_hosts()

1176

1177

mbligh

a700772

2009-01-13 00:37:11 +0000

[diff] [blame]

1178

site_server_job = utils.import_site_class(

1179

__file__, "autotest_lib.server.site_server_job", "site_server_job",

1180

base_server_job)

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1181

mbligh

0a8c332

2009-04-28 18:32:19 +0000

[diff] [blame]

1182

class server_job(site_server_job):

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1183

pass

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1184

1185

1186

class warning_manager(object):

1187

"""Class for controlling warning logs. Manages the enabling and disabling

1188

of warnings."""

1189

def __init__(self):

1190

# a map of warning types to a list of disabled time intervals

1191

self.disabled_warnings = {}

1192

1193

1194

def is_valid(self, timestamp, warning_type):

1195

"""Indicates if a warning (based on the time it occured and its type)

1196

is a valid warning. A warning is considered "invalid" if this type of

1197

warning was marked as "disabled" at the time the warning occured."""

1198

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1199

for start, end in disabled_intervals:

1200

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1206

"""As of now, disables all further warnings of this type."""

1207

intervals = self.disabled_warnings.setdefault(warning_type, [])

1208

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1209

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1210

1211

1212

def enable_warnings(self, warning_type, current_time_func=time.time):

1213

"""As of now, enables all further warnings of this type."""

1214

intervals = self.disabled_warnings.get(warning_type, [])

1215

if intervals and intervals[-1][1] is None:

jadmanski