Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2013-01-25 16:15:20 -0500

[diff] [blame]

12

import getpass, os, sys, re, tempfile, time, select, platform

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

13

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

14

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

15

from autotest_lib.client.common_lib import base_job

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

16

from autotest_lib.client.common_lib import error, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

17

from autotest_lib.client.common_lib import logging_manager

Paul Pendlebury

5759356

2011-06-15 10:45:49 -0700

[diff] [blame]

18

from autotest_lib.server import test, subcommand, profilers

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

19

from autotest_lib.server.hosts import abstract_ssh

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

20

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

21

22

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

23

def _control_segment_path(name):

24

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

25

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

26

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

27

28

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

29

CLIENT_CONTROL_FILENAME = 'control'

30

SERVER_CONTROL_FILENAME = 'control.srv'

31

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

32

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

33

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

34

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

35

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

36

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

37

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

38

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

39

REPAIR_CONTROL_FILE = _control_segment_path('repair')

Alex Miller

cb79ba7

2013-05-29 14:43:00 -0700

[diff] [blame]

40

PROVISION_CONTROL_FILE = _control_segment_path('provision')

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame^]

41

VERIFY_JOB_REPO_URL_CONTROL_FILE = _control_segment_path('verify_job_repo_url')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

42

43

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

44

# by default provide a stub that generates no site data

45

def _get_site_job_data_dummy(job):

return {}

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

49

class status_indenter(base_job.status_indenter):

50

"""Provide a simple integer-backed status indenter."""

def __init__(self):

self._indent = 0

@property

def indent(self):

return self._indent

def increment(self):

self._indent += 1

def decrement(self):

self._indent -= 1

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

68

def get_context(self):

69

"""Returns a context object for use by job.get_record_context."""

70

class context(object):

71

def __init__(self, indenter, indent):

72

self._indenter = indenter

73

self._indent = indent

74

def restore(self):

75

self._indenter._indent = self._indent

76

return context(self, self._indent)

77

78

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

79

class server_job_record_hook(object):

80

"""The job.record hook for server job. Used to inject WARN messages from

81

the console or vlm whenever new logs are written, and to echo any logs

82

to INFO level logging. Implemented as a class so that it can use state to

83

block recursive calls, so that the hook can call job.record itself to

84

log WARN messages.

85

86

Depends on job._read_warnings and job._logger.

87

"""

88

def __init__(self, job):

89

self._job = job

90

self._being_called = False

91

92

93

def __call__(self, entry):

94

"""A wrapper around the 'real' record hook, the _hook method, which

95

prevents recursion. This isn't making any effort to be threadsafe,

96

the intent is to outright block infinite recursion via a

97

job.record->_hook->job.record->_hook->job.record... chain."""

98

if self._being_called:

99

return

100

self._being_called = True

101

try:

102

self._hook(self._job, entry)

103

finally:

104

self._being_called = False

@staticmethod

def _hook(job, entry):

109

"""The core hook, which can safely call job.record."""

110

entries = []

111

# poll all our warning loggers for new warnings

112

for timestamp, msg in job._read_warnings():

113

warning_entry = base_job.status_log_entry(

114

'WARN', None, None, msg, {}, timestamp=timestamp)

115

entries.append(warning_entry)

116

job.record_entry(warning_entry)

117

# echo rendered versions of all the status logs to info

118

entries.append(entry)

119

for entry in entries:

120

rendered_entry = job._logger.render_entry(entry)

121

logging.info(rendered_entry)

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

122

job._parse_status(rendered_entry)

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

123

124

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

125

class base_server_job(base_job.base_job):

126

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

127

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

128

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

137

"""

138

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

139

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

140

141

def __init__(self, control, args, resultdir, label, user, machines,

142

client=False, parse_job='',

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

143

ssh_user='root', ssh_port=22, ssh_pass='', test_retry=0,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

144

group_name='', tag='',

145

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

146

"""

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

147

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

148

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

149

@param control: The pathname of the control file.

150

@param args: Passed to the control file.

151

@param resultdir: Where to throw the results.

152

@param label: Description of the job.

153

@param user: Username for the job (email address).

154

@param client: True if this is a client-side control file.

155

@param parse_job: string, if supplied it is the job execution tag that

156

the results will be passed through to the TKO parser with.

157

@param ssh_user: The SSH username. [root]

158

@param ssh_port: The SSH port number. [22]

159

@param ssh_pass: The SSH passphrase, if needed.

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

160

@param test_retry: The number of times to retry a test if the test did

161

not complete successfully.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

162

@param group_name: If supplied, this will be written out as

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

163

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

164

@param tag: The job execution tag from the scheduler. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

165

@param control_filename: The filename where the server control file

166

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

167

"""

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

168

super(base_server_job, self).__init__(resultdir=resultdir,

169

test_retry=test_retry)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

170

path = os.path.dirname(__file__)

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

171

self.test_retry = test_retry

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

172

self.control = control

173

self._uncollected_log_file = os.path.join(self.resultdir,

174

'uncollected_logs')

175

debugdir = os.path.join(self.resultdir, 'debug')

176

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

183

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

184

self.args = args

Peter Mayo

7a87576

2012-06-13 14:38:15 -0400

[diff] [blame]

185

self.label = label

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

186

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

187

self._client = client

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

188

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

189

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

190

self._ssh_user = ssh_user

191

self._ssh_port = ssh_port

192

self._ssh_pass = ssh_pass

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

193

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

194

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

195

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

196

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

197

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

198

self._control_filename = control_filename

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

199

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

200

self.logging = logging_manager.get_logging_manager(

201

manage_stdout_and_stderr=True, redirect_fds=True)

202

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

203

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

204

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

205

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

206

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

207

job_data = {'label' : label, 'user' : user,

208

'hostname' : ','.join(machines),

Eric Li

861b2d5

2011-02-04 14:50:35 -0800

[diff] [blame]

209

'drone' : platform.node(),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

210

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

211

'job_started' : str(int(time.time()))}

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

212

if group_name:

213

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

214

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

215

# only write these keyvals out on the first job in a resultdir

216

if 'job_started' not in utils.read_keyval(self.resultdir):

217

job_data.update(get_site_job_data(self))

218

utils.write_keyval(self.resultdir, job_data)

219

220

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

221

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

222

self.pkgmgr = packages.PackageManager(

223

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

224

self.num_tests_run = 0

225

self.num_tests_failed = 0

226

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

227

self._register_subcommand_hooks()

228

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

229

# these components aren't usable on the server

230

self.bootloader = None

231

self.harness = None

232

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

233

# set up the status logger

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

234

self._indenter = status_indenter()

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

235

self._logger = base_job.status_logger(

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

236

self, self._indenter, 'status.log', 'status.log',

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

237

record_hook=server_job_record_hook(self))

238

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

239

240

@classmethod

241

def _find_base_directories(cls):

242

"""

243

Determine locations of autodir, clientdir and serverdir. Assumes

244

that this file is located within serverdir and uses __file__ along

245

with relative paths to resolve the location.

246

"""

247

serverdir = os.path.abspath(os.path.dirname(__file__))

248

autodir = os.path.normpath(os.path.join(serverdir, '..'))

249

clientdir = os.path.join(autodir, 'client')

250

return autodir, clientdir, serverdir

251

252

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

253

def _find_resultdir(self, resultdir, *args, **dargs):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

254

"""

255

Determine the location of resultdir. For server jobs we expect one to

256

always be explicitly passed in to __init__, so just return that.

257

"""

258

if resultdir:

259

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

263

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

264

def _get_status_logger(self):

265

"""Return a reference to the status logger."""

return self._logger

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

269

@staticmethod

270

def _load_control_file(path):

271

f = open(path)

272

try:

273

control_file = f.read()

274

finally:

275

f.close()

276

return re.sub('\r', '', control_file)

277

278

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

279

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

280

"""

281

Register some hooks into the subcommand modules that allow us

282

to properly clean up self.hosts created in forked subprocesses.

283

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

284

def on_fork(cmd):

285

self._existing_hosts_on_fork = set(self.hosts)

286

def on_join(cmd):

287

new_hosts = self.hosts - self._existing_hosts_on_fork

288

for host in new_hosts:

289

host.close()

290

subcommand.subcommand.register_fork_hook(on_fork)

291

subcommand.subcommand.register_join_hook(on_join)

292

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

293

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

294

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

295

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

296

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

297

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

298

the database if necessary.

299

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

300

if not self._using_parser:

301

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

302

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

303

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

304

parse_log = open(parse_log, 'w', 0)

305

tko_utils.redirect_parser_debugging(parse_log)

306

# create a job model object and set up the db

307

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

308

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

309

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

310

self.parser.start(self.job_model)

311

# check if a job already exists in the db and insert it if

312

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

313

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

314

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

315

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

316

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

317

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

318

self.job_model.index = job_idx

319

self.job_model.machine_idx = machine_idx

320

321

322

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

323

"""

324

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

325

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

326

remaining test results to the results db)

327

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

328

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

329

return

330

final_tests = self.parser.end()

331

for test in final_tests:

332

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

333

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def verify(self):

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

338

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

339

if self.resultdir:

340

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

341

try:

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

342

namespace = {'machines' : self.machines, 'job' : self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

343

'ssh_user' : self._ssh_user,

344

'ssh_port' : self._ssh_port,

345

'ssh_pass' : self._ssh_pass}

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

346

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

347

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

348

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

349

self.record('ABORT', None, None, msg)

raise

def repair(self, host_protection):

354

if not self.machines:

355

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

356

if self.resultdir:

357

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

358

namespace = {'machines': self.machines, 'job': self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

359

'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,

360

'ssh_pass': self._ssh_pass,

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

361

'protection_level': host_protection}

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

362

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

363

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

364

365

Alex Miller

cb79ba7

2013-05-29 14:43:00 -0700

[diff] [blame]

366

def provision(self, labels):

367

"""

368

Provision all hosts to match |labels|.

369

370

@param labels: A comma seperated string of labels to provision the

host to.

"""

namespace = {'provision_labels': labels}

375

control = self._load_control_file(PROVISION_CONTROL_FILE)

376

self.run(control=control, namespace=namespace)

377

378

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

379

def precheck(self):

380

"""

381

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

387

"""

388

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

394

"""

395

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

401

"""

402

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

407

def _make_parallel_wrapper(self, function, machines, log):

408

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

409

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

410

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

411

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

412

self._parse_job += "/" + machine

413

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

414

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

415

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

416

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

417

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

418

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

419

result = function(machine)

420

self.cleanup_parser()

421

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

422

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

423

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

424

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

425

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

426

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

427

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

428

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

429

result = function(machine)

430

return result

431

else:

432

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

437

return_results=False):

438

"""

439

Run 'function' using parallel_simple, with an extra wrapper to handle

440

the necessary setup for continuous parsing, if possible. If continuous

441

parsing is already properly initialized then this should just work.

442

443

@param function: A callable to run in parallel given each machine.

444

@param machines: A list of machine names to be passed one per subcommand

445

invocation of function.

446

@param log: If True, output will be written to output in a subdirectory

447

named after each machine.

448

@param timeout: Seconds after which the function call should timeout.

449

@param return_results: If True instead of an AutoServError being raised

450

on any error a list of the results|exceptions from the function

451

called on each arg is returned. [default: False]

452

453

@raises error.AutotestError: If any of the functions failed.

454

"""

455

wrapper = self._make_parallel_wrapper(function, machines, log)

456

return subcommand.parallel_simple(wrapper, machines,

457

log=log, timeout=timeout,

458

return_results=return_results)

459

460

461

def parallel_on_machines(self, function, machines, timeout=None):

462

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

463

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

464

@param machines: A list of machines to call function(machine) on.

465

@param timeout: Seconds after which the function call should timeout.

466

467

@returns A list of machines on which function(machine) returned

468

without raising an exception.

469

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

470

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

471

return_results=True)

472

success_machines = []

473

for result, machine in itertools.izip(results, machines):

474

if not isinstance(result, Exception):

475

success_machines.append(machine)

476

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

477

478

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

479

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

480

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

481

collect_crashdumps=True, namespace={}, control=None,

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame^]

482

control_file_dir=None, verify_job_repo_url=False,

483

only_collect_crashinfo=False):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

484

# for a normal job, make sure the uncollected logs file exists

485

# for a crashinfo-only run it should already exist, bail out otherwise

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

486

created_uncollected_logs = False

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

487

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

488

if only_collect_crashinfo:

489

# if this is a crashinfo-only run, and there were no existing

490

# uncollected logs, just bail out early

491

logging.info("No existing uncollected logs, "

492

"skipping crashinfo collection")

493

return

494

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

495

log_file = open(self._uncollected_log_file, "w")

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

496

pickle.dump([], log_file)

497

log_file.close()

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

498

created_uncollected_logs = True

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

499

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

500

# use a copy so changes don't affect the original dictionary

501

namespace = namespace.copy()

502

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

503

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

504

if self.control is None:

505

control = ''

506

else:

507

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

508

if control_file_dir is None:

509

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

510

511

self.aborted = False

512

namespace['machines'] = machines

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

513

namespace['args'] = self.args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

514

namespace['job'] = self

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

515

namespace['ssh_user'] = self._ssh_user

516

namespace['ssh_port'] = self._ssh_port

517

namespace['ssh_pass'] = self._ssh_pass

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

518

test_start_time = int(time.time())

519

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

520

if self.resultdir:

521

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

522

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

523

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

524

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

525

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

526

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

527

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

528

try:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

529

try:

530

if install_before and machines:

531

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

532

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

533

if only_collect_crashinfo:

534

return

535

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame^]

536

# If the verify_job_repo_url option is set but we're unable

537

# to actually verify that the job_repo_url contains the autotest

538

# package, this job will fail.

539

if verify_job_repo_url:

540

self._execute_code(VERIFY_JOB_REPO_URL_CONTROL_FILE,

541

namespace)

542

else:

543

logging.warning('Not checking if job_repo_url contains '

544

'autotest packages on %s', machines)

545

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

546

# determine the dir to write the control files to

547

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

548

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

549

if cfd_specified:

550

temp_control_file_dir = None

551

else:

552

temp_control_file_dir = tempfile.mkdtemp(

553

suffix='temp_control_file_dir')

554

control_file_dir = temp_control_file_dir

555

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

556

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

557

client_control_file = os.path.join(control_file_dir,

558

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

559

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

560

namespace['control'] = control

561

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

562

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

563

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

564

else:

565

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

566

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

567

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

568

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

569

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

570

# no error occured, so we don't need to collect crashinfo

571

collect_crashinfo = False

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

572

except Exception, e:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

573

try:

574

logging.exception(

575

'Exception escaped control file, job aborting:')

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

576

self.record('INFO', None, None, str(e),

577

{'job_abort_reason': str(e)})

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

578

except:

579

pass # don't let logging exceptions here interfere

580

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

581

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

582

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

583

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

584

try:

585

shutil.rmtree(temp_control_file_dir)

586

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

587

logging.warn('Could not remove temp directory %s: %s',

588

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

589

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

590

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

591

namespace['test_start_time'] = test_start_time

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

592

if collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

593

# includes crashdumps

594

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

595

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

596

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

597

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

598

if cleanup and machines:

599

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

Chris Sosa

f4d43ff

2012-10-30 11:21:05 -0700

[diff] [blame]

600

if self._uncollected_log_file and created_uncollected_logs:

601

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

602

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

603

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

604

605

606

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

607

"""

608

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

609

610

tag

611

tag to add to testname

612

url

613

url of the test to run

614

"""

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

615

group, testname = self.pkgmgr.get_package_name(url, 'test')

616

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

617

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

622

except error.TestBaseException, e:

623

self.record(e.exit_status, subdir, testname, str(e))

624

raise

625

except Exception, e:

626

info = str(e) + "\n" + traceback.format_exc()

627

self.record('FAIL', subdir, testname, info)

628

raise

629

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

630

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

631

632

result, exc_info = self._run_group(testname, subdir, group_func)

633

if exc_info and isinstance(exc_info[1], error.TestBaseException):

634

return False

635

elif exc_info:

636

raise exc_info[0], exc_info[1], exc_info[2]

637

else:

638

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

639

640

641

def _run_group(self, name, subdir, function, *args, **dargs):

642

"""\

643

Underlying method for running something inside of a group.

644

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

645

result, exc_info = None, None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

646

try:

647

self.record('START', subdir, name)

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

648

result = function(*args, **dargs)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

649

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

650

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

651

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

652

except Exception, e:

653

err_msg = str(e) + '\n'

654

err_msg += traceback.format_exc()

655

self.record('END ABORT', subdir, name, err_msg)

656

raise error.JobError(name + ' failed\n' + traceback.format_exc())

657

else:

658

self.record('END GOOD', subdir, name)

659

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

660

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

661

662

663

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

669

"""

670

671

name = function.__name__

672

673

# Allow the tag for the group to be specified.

674

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

678

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

679

680

681

def run_reboot(self, reboot_func, get_kernel_func):

682

"""\

683

A specialization of run_group meant specifically for handling

684

a reboot. Includes support for capturing the kernel version

685

after the reboot.

686

687

reboot_func: a function that carries out the reboot

688

689

get_kernel_func: a function that returns a string

690

representing the kernel version.

691

"""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

692

try:

693

self.record('START', None, 'reboot')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

694

reboot_func()

695

except Exception, e:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

696

err_msg = str(e) + '\n' + traceback.format_exc()

697

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

698

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

699

else:

700

kernel = get_kernel_func()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

701

self.record('END GOOD', None, 'reboot',

Dale Curtis

74a314b

2011-06-23 14:55:46 -0700

[diff] [blame]

702

optional_fields={"kernel": kernel})

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

703

704

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

705

def run_control(self, path):

706

"""Execute a control file found at path (relative to the autotest

707

path). Intended for executing a control file within a control file,

708

not for running the top-level job control file."""

709

path = os.path.join(self.autodir, path)

710

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

711

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

712

713

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

714

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

715

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

720

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

721

722

723

def _add_sysinfo_loggable(self, loggable, on_every_test):

724

if on_every_test:

725

self.sysinfo.test_loggables.add(loggable)

726

else:

727

self.sysinfo.boot_loggables.add(loggable)

728

729

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

730

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

731

"""Poll all the warning loggers and extract any new warnings that have

732

been logged. If the warnings belong to a category that is currently

733

disabled, this method will discard them and they will no longer be

734

retrievable.

735

736

Returns a list of (timestamp, message) tuples, where timestamp is an

737

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

738

warnings = []

739

while True:

740

# pull in a line of output from every logger that has

741

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

742

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

743

closed_loggers = set()

744

for logger in loggers:

745

line = logger.readline()

746

# record any broken pipes (aka line == empty)

747

if len(line) == 0:

748

closed_loggers.add(logger)

749

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

750

# parse out the warning

751

timestamp, msgtype, msg = line.split('\t', 2)

752

timestamp = int(timestamp)

753

# if the warning is valid, add it to the results

754

if self.warning_manager.is_valid(timestamp, msgtype):

755

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

756

757

# stop listening to loggers that are closed

758

self.warning_loggers -= closed_loggers

759

760

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

769

def _unique_subdirectory(self, base_subdirectory_name):

770

"""Compute a unique results subdirectory based on the given name.

771

772

Appends base_subdirectory_name with a number as necessary to find a

773

directory name that doesn't already exist.

774

"""

775

subdirectory = base_subdirectory_name

776

counter = 1

777

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

778

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

783

def get_record_context(self):

784

"""Returns an object representing the current job.record context.

785

786

The object returned is an opaque object with a 0-arg restore method

787

which can be called to restore the job.record context (i.e. indentation)

788

to the current level. The intention is that it should be used when

789

something external which generate job.record calls (e.g. an autotest

790

client) can fail catastrophically and the server job record state

791

needs to be reset to its original "known good" state.

792

793

@return: A context object with a 0-arg restore() method."""

794

return self._indenter.get_context()

795

796

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

797

def record_summary(self, status_code, test_name, reason='', attributes=None,

798

distinguishing_attributes=(), child_test_ids=None):

799

"""Record a summary test result.

800

801

@param status_code: status code string, see

802

common_lib.log.is_valid_status()

803

@param test_name: name of the test

804

@param reason: (optional) string providing detailed reason for test

805

outcome

806

@param attributes: (optional) dict of string keyvals to associate with

807

this result

808

@param distinguishing_attributes: (optional) list of attribute names

809

that should be used to distinguish identically-named test

810

results. These attributes should be present in the attributes

811

parameter. This is used to generate user-friendly subdirectory

812

names.

813

@param child_test_ids: (optional) list of test indices for test results

814

used in generating this result.

815

"""

816

subdirectory_name_parts = [test_name]

817

for attribute in distinguishing_attributes:

818

assert attributes

819

assert attribute in attributes, '%s not in %s' % (attribute,

820

attributes)

821

subdirectory_name_parts.append(attributes[attribute])

822

base_subdirectory_name = '.'.join(subdirectory_name_parts)

823

824

subdirectory = self._unique_subdirectory(base_subdirectory_name)

825

subdirectory_path = os.path.join(self.resultdir, subdirectory)

826

os.mkdir(subdirectory_path)

827

828

self.record(status_code, subdirectory, test_name,

829

status=reason, optional_fields={'is_summary': True})

830

831

if attributes:

832

utils.write_keyval(subdirectory_path, attributes)

833

834

if child_test_ids:

835

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

836

summary_data = {'child_test_ids': ids_string}

837

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

841

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

842

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

843

self.record("INFO", None, None,

844

"disabling %s warnings" % warning_type,

845

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

846

847

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

848

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

849

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

850

self.record("INFO", None, None,

851

"enabling %s warnings" % warning_type,

852

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

853

854

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

855

def get_status_log_path(self, subdir=None):

856

"""Return the path to the job status log.

857

858

@param subdir - Optional paramter indicating that you want the path

859

to a subdirectory status log.

860

861

@returns The path where the status log should be.

862

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

863

if self.resultdir:

864

if subdir:

865

return os.path.join(self.resultdir, subdir, "status.log")

866

else:

867

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

868

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

869

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

870

871

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

872

def _update_uncollected_logs_list(self, update_func):

873

"""Updates the uncollected logs list in a multi-process safe manner.

874

875

@param update_func - a function that updates the list of uncollected

876

logs. Should take one parameter, the list to be updated.

877

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

878

if self._uncollected_log_file:

879

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

880

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

881

try:

882

uncollected_logs = pickle.load(log_file)

883

update_func(uncollected_logs)

884

log_file.seek(0)

885

log_file.truncate()

886

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

887

log_file.flush()

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

888

finally:

889

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

894

"""Adds a new set of client logs to the list of uncollected logs,

895

to allow for future log recovery.

896

897

@param host - the hostname of the machine holding the logs

898

@param remote_path - the directory on the remote machine holding logs

899

@param local_path - the local directory to copy the logs into

900

"""

901

def update_func(logs_list):

902

logs_list.append((hostname, remote_path, local_path))

903

self._update_uncollected_logs_list(update_func)

904

905

906

def remove_client_log(self, hostname, remote_path, local_path):

907

"""Removes a set of client logs from the list of uncollected logs,

908

to allow for future log recovery.

909

910

@param host - the hostname of the machine holding the logs

911

@param remote_path - the directory on the remote machine holding logs

912

@param local_path - the local directory to copy the logs into

913

"""

914

def update_func(logs_list):

915

logs_list.remove((hostname, remote_path, local_path))

916

self._update_uncollected_logs_list(update_func)

917

918

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

919

def get_client_logs(self):

920

"""Retrieves the list of uncollected logs, if it exists.

921

922

@returns A list of (host, remote_path, local_path) tuples. Returns

923

an empty list if no uncollected logs file exists.

924

"""

925

log_exists = (self._uncollected_log_file and

926

os.path.exists(self._uncollected_log_file))

927

if log_exists:

928

return pickle.load(open(self._uncollected_log_file))

else:

return []

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

933

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

934

"""

935

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

936

937

This sets up the control file API by importing modules and making them

938

available under the appropriate names within namespace.

939

940

For use by _execute_code().

941

942

Args:

943

namespace: The namespace dictionary to fill in.

944

protect: Boolean. If True (the default) any operation that would

945

clobber an existing entry in namespace will cause an error.

946

Raises:

947

error.AutoservError: When a name would be clobbered by import.

948

"""

949

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

950

"""

951

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

952

953

Args:

954

module_name: The string module name.

955

names: A limiting list of names to import from module_name. If

956

empty (the default), all names are imported from the module

957

similar to a "from foo.bar import *" statement.

958

Raises:

959

error.AutoservError: When a name being imported would clobber

960

a name already in namespace.

961

"""

962

module = __import__(module_name, {}, {}, names)

963

964

# No names supplied? Import * from the lowest level module.

965

# (Ugh, why do I have to implement this part myself?)

966

if not names:

967

for submodule_name in module_name.split('.')[1:]:

968

module = getattr(module, submodule_name)

969

if hasattr(module, '__all__'):

970

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

975

# doesn't override anything that already exists.

976

for name in names:

977

# Check for conflicts to help prevent future problems.

978

if name in namespace and protect:

979

if namespace[name] is not getattr(module, name):

980

raise error.AutoservError('importing name '

981

'%s from %s %r would override %r' %

982

(name, module_name, getattr(module, name),

983

namespace[name]))

984

else:

985

# Encourage cleanliness and the use of __all__ for a

986

# more concrete API with less surprises on '*' imports.

987

warnings.warn('%s (%r) being imported from %s for use '

988

'in server control files is not the '

989

'first occurrance of that import.' %

990

(name, namespace[name], module_name))

991

992

namespace[name] = getattr(module, name)

993

994

995

# This is the equivalent of prepending a bunch of import statements to

996

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

997

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

998

_import_names('autotest_lib.server',

999

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

1000

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

1001

_import_names('autotest_lib.server.subcommand',

1002

('parallel', 'parallel_simple', 'subcommand'))

1003

_import_names('autotest_lib.server.utils',

1004

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

1005

_import_names('autotest_lib.client.common_lib.error')

1006

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

1007

1008

# Inject ourself as the job object into other classes within the API.

1009

# (Yuck, this injection is a gross thing be part of a public API. -gps)

1010

#

1011

# XXX Base & SiteAutotest do not appear to use .job. Who does?

1012

namespace['autotest'].Autotest.job = self

1013

# server.hosts.base_classes.Host uses .job.

1014

namespace['hosts'].Host.job = self

Eric Li

10222b8

2010-11-24 09:33:15 -0800

[diff] [blame]

1015

namespace['hosts'].factory.ssh_user = self._ssh_user

1016

namespace['hosts'].factory.ssh_port = self._ssh_port

1017

namespace['hosts'].factory.ssh_pass = self._ssh_pass

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1018

1019

1020

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1021

"""

1022

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1023

1024

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

1029

namespace: A dict containing names to make available during execution.

1030

protect: Boolean. If True (the default) a copy of the namespace dict

1031

is used during execution to prevent the code from modifying its

1032

contents outside of this function. If False the raw dict is

1033

passed in and modifications will be allowed.

1034

"""

1035

if protect:

1036

namespace = namespace.copy()

1037

self._fill_server_control_namespace(namespace, protect=protect)

1038

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1039

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1040

machines_text = '\n'.join(self.machines) + '\n'

1041

# Only rewrite the file if it does not match our machine list.

1042

try:

1043

machines_f = open(MACHINES_FILENAME, 'r')

1044

existing_machines_text = machines_f.read()

1045

machines_f.close()

1046

except EnvironmentError:

1047

existing_machines_text = None

1048

if machines_text != existing_machines_text:

1049

utils.open_write_close(MACHINES_FILENAME, machines_text)

1050

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1051

1052

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1053

def _parse_status(self, new_line):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1054

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1055

return

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1056

new_tests = self.parser.process_lines([new_line])

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1057

for test in new_tests:

1058

self.__insert_test(test)

1059

1060

1061

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1062

"""

1063

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1064

database. This method will not raise an exception, even if an

1065

error occurs during the insert, to avoid failing a test

1066

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1067

self.num_tests_run += 1

1068

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1069

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1070

try:

1071

self.results_db.insert_test(self.job_model, test)

1072

except Exception:

1073

msg = ("WARNING: An unexpected error occured while "

1074

"inserting test results into the database. "

1075

"Ignoring error.\n" + traceback.format_exc())

1076

print >> sys.stderr, msg

1077

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1078

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1079

def preprocess_client_state(self):

1080

"""

1081

Produce a state file for initializing the state of a client job.

1082

1083

Creates a new client state file with all the current server state, as

1084

well as some pre-set client state.

1085

1086

@returns The path of the file the state was written into.

1087

"""

1088

# initialize the sysinfo state

1089

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1090

1091

# dump the state out to a tempfile

1092

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1093

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1094

1095

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1096

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1101

"""

1102

Update the state of this job with the state from a client job.

1103

1104

Updates the state of the server side of a job with the final state

1105

of a client job that was run. Updates the non-client-specific state,

1106

pulls in some specific bits from the client-specific state, and then

1107

discards the rest. Removes the state file afterwards

1108

1109

@param state_file A path to the state file from the client.

1110

"""

1111

# update the on-disk state

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1112

try:

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1113

self._state.read_from_file(state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1114

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1115

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1116

# ignore file-not-found errors

1117

if e.errno != errno.ENOENT:

1118

raise

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1119

else:

1120

logging.debug('Client state file %s not found', state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1121

1122

# update the sysinfo state

1123

if self._state.has('client', 'sysinfo'):

1124

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1125

1126

# drop all the client-specific state

1127

self._state.discard_namespace('client')

1128

1129

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

1130

def clear_all_known_hosts(self):

1131

"""Clears known hosts files for all AbstractSSHHosts."""

1132

for host in self.hosts:

1133

if isinstance(host, abstract_ssh.AbstractSSHHost):

1134

host.clear_known_hosts()

1135

1136

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1137

class warning_manager(object):

1138

"""Class for controlling warning logs. Manages the enabling and disabling

1139

of warnings."""

1140

def __init__(self):

1141

# a map of warning types to a list of disabled time intervals

1142

self.disabled_warnings = {}

1143

1144

1145

def is_valid(self, timestamp, warning_type):

1146

"""Indicates if a warning (based on the time it occured and its type)

1147

is a valid warning. A warning is considered "invalid" if this type of

1148

warning was marked as "disabled" at the time the warning occured."""

1149

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1150

for start, end in disabled_intervals:

1151

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1157

"""As of now, disables all further warnings of this type."""

1158

intervals = self.disabled_warnings.setdefault(warning_type, [])

1159

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1160

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1161

1162

1163

def enable_warnings(self, warning_type, current_time_func=time.time):

1164

"""As of now, enables all further warnings of this type."""

1165

intervals = self.disabled_warnings.get(warning_type, [])

1166

if intervals and intervals[-1][1] is None:

jadmanski