Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2013-01-25 16:15:20 -0500

[diff] [blame]

12

import getpass, os, sys, re, tempfile, time, select, platform

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

13

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

14

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

15

from autotest_lib.client.common_lib import base_job

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

16

from autotest_lib.client.common_lib import error, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

17

from autotest_lib.client.common_lib import logging_manager

Paul Pendlebury

5759356

2011-06-15 10:45:49 -0700

[diff] [blame]

18

from autotest_lib.server import test, subcommand, profilers

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

19

from autotest_lib.server.hosts import abstract_ssh

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

20

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

21

22

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

23

def _control_segment_path(name):

24

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

25

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

26

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

27

28

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

29

CLIENT_CONTROL_FILENAME = 'control'

30

SERVER_CONTROL_FILENAME = 'control.srv'

31

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

32

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

33

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

34

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

35

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

36

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

37

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

38

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

39

REPAIR_CONTROL_FILE = _control_segment_path('repair')

Alex Miller

cb79ba7

2013-05-29 14:43:00 -0700

[diff] [blame]

40

PROVISION_CONTROL_FILE = _control_segment_path('provision')

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame]

41

VERIFY_JOB_REPO_URL_CONTROL_FILE = _control_segment_path('verify_job_repo_url')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

42

43

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

44

# by default provide a stub that generates no site data

45

def _get_site_job_data_dummy(job):

return {}

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

49

class status_indenter(base_job.status_indenter):

50

"""Provide a simple integer-backed status indenter."""

def __init__(self):

self._indent = 0

@property

def indent(self):

return self._indent

def increment(self):

self._indent += 1

def decrement(self):

self._indent -= 1

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

68

def get_context(self):

69

"""Returns a context object for use by job.get_record_context."""

70

class context(object):

71

def __init__(self, indenter, indent):

72

self._indenter = indenter

73

self._indent = indent

74

def restore(self):

75

self._indenter._indent = self._indent

76

return context(self, self._indent)

77

78

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

79

class server_job_record_hook(object):

80

"""The job.record hook for server job. Used to inject WARN messages from

81

the console or vlm whenever new logs are written, and to echo any logs

82

to INFO level logging. Implemented as a class so that it can use state to

83

block recursive calls, so that the hook can call job.record itself to

84

log WARN messages.

85

86

Depends on job._read_warnings and job._logger.

87

"""

88

def __init__(self, job):

89

self._job = job

90

self._being_called = False

91

92

93

def __call__(self, entry):

94

"""A wrapper around the 'real' record hook, the _hook method, which

95

prevents recursion. This isn't making any effort to be threadsafe,

96

the intent is to outright block infinite recursion via a

97

job.record->_hook->job.record->_hook->job.record... chain."""

98

if self._being_called:

99

return

100

self._being_called = True

101

try:

102

self._hook(self._job, entry)

103

finally:

104

self._being_called = False

@staticmethod

def _hook(job, entry):

109

"""The core hook, which can safely call job.record."""

110

entries = []

111

# poll all our warning loggers for new warnings

112

for timestamp, msg in job._read_warnings():

113

warning_entry = base_job.status_log_entry(

114

'WARN', None, None, msg, {}, timestamp=timestamp)

115

entries.append(warning_entry)

116

job.record_entry(warning_entry)

117

# echo rendered versions of all the status logs to info

118

entries.append(entry)

119

for entry in entries:

120

rendered_entry = job._logger.render_entry(entry)

121

logging.info(rendered_entry)

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

122

job._parse_status(rendered_entry)

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

123

124

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

125

class base_server_job(base_job.base_job):

126

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

127

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

128

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

137

"""

138

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

139

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

140

141

def __init__(self, control, args, resultdir, label, user, machines,

142

client=False, parse_job='',

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

143

ssh_user='root', ssh_port=22, ssh_pass='', test_retry=0,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

144

group_name='', tag='',

145

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

146

"""

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

147

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

148

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

149

@param control: The pathname of the control file.

150

@param args: Passed to the control file.

151

@param resultdir: Where to throw the results.

152

@param label: Description of the job.

153

@param user: Username for the job (email address).

154

@param client: True if this is a client-side control file.

155

@param parse_job: string, if supplied it is the job execution tag that

156

the results will be passed through to the TKO parser with.

157

@param ssh_user: The SSH username. [root]

158

@param ssh_port: The SSH port number. [22]

159

@param ssh_pass: The SSH passphrase, if needed.

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

160

@param test_retry: The number of times to retry a test if the test did

161

not complete successfully.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

162

@param group_name: If supplied, this will be written out as

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

163

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

164

@param tag: The job execution tag from the scheduler. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

165

@param control_filename: The filename where the server control file

166

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

167

"""

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

168

super(base_server_job, self).__init__(resultdir=resultdir,

169

test_retry=test_retry)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

170

path = os.path.dirname(__file__)

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

171

self.test_retry = test_retry

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

172

self.control = control

173

self._uncollected_log_file = os.path.join(self.resultdir,

174

'uncollected_logs')

175

debugdir = os.path.join(self.resultdir, 'debug')

176

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

183

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

184

self.args = args

Peter Mayo

7a87576

2012-06-13 14:38:15 -0400

[diff] [blame]

185

self.label = label

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

186

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

187

self._client = client

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

188

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

189

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

190

self._ssh_user = ssh_user

191

self._ssh_port = ssh_port

192

self._ssh_pass = ssh_pass

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

193

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

194

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

195

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

196

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

197

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

198

self._control_filename = control_filename

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

199

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

200

self.logging = logging_manager.get_logging_manager(

201

manage_stdout_and_stderr=True, redirect_fds=True)

202

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

203

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

204

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

205

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

206

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

207

job_data = {'label' : label, 'user' : user,

208

'hostname' : ','.join(machines),

Eric Li

861b2d5

2011-02-04 14:50:35 -0800

[diff] [blame]

209

'drone' : platform.node(),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

210

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

211

'job_started' : str(int(time.time()))}

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

212

if group_name:

213

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

214

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

215

# only write these keyvals out on the first job in a resultdir

216

if 'job_started' not in utils.read_keyval(self.resultdir):

217

job_data.update(get_site_job_data(self))

218

utils.write_keyval(self.resultdir, job_data)

219

220

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

221

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

222

self.pkgmgr = packages.PackageManager(

223

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

224

self.num_tests_run = 0

225

self.num_tests_failed = 0

226

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

227

self._register_subcommand_hooks()

228

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

229

# these components aren't usable on the server

230

self.bootloader = None

231

self.harness = None

232

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

233

# set up the status logger

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

234

self._indenter = status_indenter()

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

235

self._logger = base_job.status_logger(

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

236

self, self._indenter, 'status.log', 'status.log',

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

237

record_hook=server_job_record_hook(self))

238

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

239

240

@classmethod

241

def _find_base_directories(cls):

242

"""

243

Determine locations of autodir, clientdir and serverdir. Assumes

244

that this file is located within serverdir and uses __file__ along

245

with relative paths to resolve the location.

246

"""

247

serverdir = os.path.abspath(os.path.dirname(__file__))

248

autodir = os.path.normpath(os.path.join(serverdir, '..'))

249

clientdir = os.path.join(autodir, 'client')

250

return autodir, clientdir, serverdir

251

252

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

253

def _find_resultdir(self, resultdir, *args, **dargs):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

254

"""

255

Determine the location of resultdir. For server jobs we expect one to

256

always be explicitly passed in to __init__, so just return that.

257

"""

258

if resultdir:

259

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

263

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

264

def _get_status_logger(self):

265

"""Return a reference to the status logger."""

return self._logger

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

269

@staticmethod

270

def _load_control_file(path):

271

f = open(path)

272

try:

273

control_file = f.read()

274

finally:

275

f.close()

276

return re.sub('\r', '', control_file)

277

278

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

279

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

280

"""

281

Register some hooks into the subcommand modules that allow us

282

to properly clean up self.hosts created in forked subprocesses.

283

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

284

def on_fork(cmd):

285

self._existing_hosts_on_fork = set(self.hosts)

286

def on_join(cmd):

287

new_hosts = self.hosts - self._existing_hosts_on_fork

288

for host in new_hosts:

289

host.close()

290

subcommand.subcommand.register_fork_hook(on_fork)

291

subcommand.subcommand.register_join_hook(on_join)

292

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

293

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

294

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

295

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

296

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

297

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

298

the database if necessary.

299

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

300

if not self._using_parser:

301

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

302

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

303

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

304

parse_log = open(parse_log, 'w', 0)

305

tko_utils.redirect_parser_debugging(parse_log)

306

# create a job model object and set up the db

307

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

308

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

309

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

310

self.parser.start(self.job_model)

311

# check if a job already exists in the db and insert it if

312

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

313

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

314

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

315

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

316

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

317

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

318

self.job_model.index = job_idx

319

self.job_model.machine_idx = machine_idx

320

321

322

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

323

"""

324

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

325

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

326

remaining test results to the results db)

327

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

328

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

329

return

330

final_tests = self.parser.end()

331

for test in final_tests:

332

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

333

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def verify(self):

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

338

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

339

if self.resultdir:

340

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

341

try:

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

342

namespace = {'machines' : self.machines, 'job' : self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

343

'ssh_user' : self._ssh_user,

344

'ssh_port' : self._ssh_port,

345

'ssh_pass' : self._ssh_pass}

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

346

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

347

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

348

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

349

self.record('ABORT', None, None, msg)

raise

def repair(self, host_protection):

354

if not self.machines:

355

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

356

if self.resultdir:

357

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

358

namespace = {'machines': self.machines, 'job': self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

359

'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,

360

'ssh_pass': self._ssh_pass,

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

361

'protection_level': host_protection}

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

362

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

363

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

364

365

Alex Miller

cb79ba7

2013-05-29 14:43:00 -0700

[diff] [blame]

366

def provision(self, labels):

367

"""

368

Provision all hosts to match |labels|.

369

370

@param labels: A comma seperated string of labels to provision the

host to.

"""

namespace = {'provision_labels': labels}

375

control = self._load_control_file(PROVISION_CONTROL_FILE)

376

self.run(control=control, namespace=namespace)

377

378

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

379

def precheck(self):

380

"""

381

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

387

"""

388

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

394

"""

395

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

401

"""

402

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

407

def _make_parallel_wrapper(self, function, machines, log):

408

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

409

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

410

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

411

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

412

self._parse_job += "/" + machine

413

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

414

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

415

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

416

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

417

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

418

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

419

result = function(machine)

420

self.cleanup_parser()

421

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

422

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

423

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

424

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

425

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

426

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

427

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

428

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

429

result = function(machine)

430

return result

431

else:

432

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

437

return_results=False):

438

"""

439

Run 'function' using parallel_simple, with an extra wrapper to handle

440

the necessary setup for continuous parsing, if possible. If continuous

441

parsing is already properly initialized then this should just work.

442

443

@param function: A callable to run in parallel given each machine.

444

@param machines: A list of machine names to be passed one per subcommand

445

invocation of function.

446

@param log: If True, output will be written to output in a subdirectory

447

named after each machine.

448

@param timeout: Seconds after which the function call should timeout.

449

@param return_results: If True instead of an AutoServError being raised

450

on any error a list of the results|exceptions from the function

451

called on each arg is returned. [default: False]

452

453

@raises error.AutotestError: If any of the functions failed.

454

"""

455

wrapper = self._make_parallel_wrapper(function, machines, log)

456

return subcommand.parallel_simple(wrapper, machines,

457

log=log, timeout=timeout,

458

return_results=return_results)

459

460

461

def parallel_on_machines(self, function, machines, timeout=None):

462

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

463

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

464

@param machines: A list of machines to call function(machine) on.

465

@param timeout: Seconds after which the function call should timeout.

466

467

@returns A list of machines on which function(machine) returned

468

without raising an exception.

469

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

470

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

471

return_results=True)

472

success_machines = []

473

for result, machine in itertools.izip(results, machines):

474

if not isinstance(result, Exception):

475

success_machines.append(machine)

476

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

477

478

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

479

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

480

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

481

collect_crashdumps=True, namespace={}, control=None,

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame]

482

control_file_dir=None, verify_job_repo_url=False,

Christopher Wiley

f594c5e

2013-07-03 18:25:30 -0700

[diff] [blame]

483

only_collect_crashinfo=False, skip_crash_collection=False):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

484

# for a normal job, make sure the uncollected logs file exists

485

# for a crashinfo-only run it should already exist, bail out otherwise

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

486

created_uncollected_logs = False

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

487

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

488

if only_collect_crashinfo:

489

# if this is a crashinfo-only run, and there were no existing

490

# uncollected logs, just bail out early

491

logging.info("No existing uncollected logs, "

492

"skipping crashinfo collection")

493

return

494

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

495

log_file = open(self._uncollected_log_file, "w")

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

496

pickle.dump([], log_file)

497

log_file.close()

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

498

created_uncollected_logs = True

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

499

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

500

# use a copy so changes don't affect the original dictionary

501

namespace = namespace.copy()

502

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

503

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

504

if self.control is None:

505

control = ''

506

else:

507

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

508

if control_file_dir is None:

509

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

510

511

self.aborted = False

512

namespace['machines'] = machines

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

513

namespace['args'] = self.args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

514

namespace['job'] = self

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

515

namespace['ssh_user'] = self._ssh_user

516

namespace['ssh_port'] = self._ssh_port

517

namespace['ssh_pass'] = self._ssh_pass

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

518

test_start_time = int(time.time())

519

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

520

if self.resultdir:

521

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

522

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

523

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

524

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

525

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

526

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

527

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

528

try:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

529

try:

530

if install_before and machines:

531

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

532

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

533

if only_collect_crashinfo:

534

return

535

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame]

536

# If the verify_job_repo_url option is set but we're unable

537

# to actually verify that the job_repo_url contains the autotest

538

# package, this job will fail.

539

if verify_job_repo_url:

540

self._execute_code(VERIFY_JOB_REPO_URL_CONTROL_FILE,

541

namespace)

542

else:

543

logging.warning('Not checking if job_repo_url contains '

544

'autotest packages on %s', machines)

545

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

546

# determine the dir to write the control files to

547

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

548

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

549

if cfd_specified:

550

temp_control_file_dir = None

551

else:

552

temp_control_file_dir = tempfile.mkdtemp(

553

suffix='temp_control_file_dir')

554

control_file_dir = temp_control_file_dir

555

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

556

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

557

client_control_file = os.path.join(control_file_dir,

558

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

559

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

560

namespace['control'] = control

561

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

562

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

563

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

564

else:

565

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

566

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

567

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

568

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

569

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

570

# no error occured, so we don't need to collect crashinfo

571

collect_crashinfo = False

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

572

except Exception, e:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

573

try:

574

logging.exception(

575

'Exception escaped control file, job aborting:')

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

576

self.record('INFO', None, None, str(e),

577

{'job_abort_reason': str(e)})

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

578

except:

579

pass # don't let logging exceptions here interfere

580

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

581

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

582

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

583

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

584

try:

585

shutil.rmtree(temp_control_file_dir)

586

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

587

logging.warn('Could not remove temp directory %s: %s',

588

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

589

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

590

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

591

namespace['test_start_time'] = test_start_time

Christopher Wiley

f594c5e

2013-07-03 18:25:30 -0700

[diff] [blame]

592

if skip_crash_collection:

593

logging.info('Skipping crash dump/info collection '

594

'as requested.')

595

elif collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

596

# includes crashdumps

597

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

598

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

599

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

600

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

601

if cleanup and machines:

602

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

Chris Sosa

f4d43ff

2012-10-30 11:21:05 -0700

[diff] [blame]

603

if self._uncollected_log_file and created_uncollected_logs:

604

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

605

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

606

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

607

608

609

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

610

"""

611

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

612

613

tag

614

tag to add to testname

615

url

616

url of the test to run

617

"""

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

618

group, testname = self.pkgmgr.get_package_name(url, 'test')

619

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

620

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

625

except error.TestBaseException, e:

626

self.record(e.exit_status, subdir, testname, str(e))

627

raise

628

except Exception, e:

629

info = str(e) + "\n" + traceback.format_exc()

630

self.record('FAIL', subdir, testname, info)

631

raise

632

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

633

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

634

635

result, exc_info = self._run_group(testname, subdir, group_func)

636

if exc_info and isinstance(exc_info[1], error.TestBaseException):

637

return False

638

elif exc_info:

639

raise exc_info[0], exc_info[1], exc_info[2]

640

else:

641

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

642

643

644

def _run_group(self, name, subdir, function, *args, **dargs):

645

"""\

646

Underlying method for running something inside of a group.

647

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

648

result, exc_info = None, None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

649

try:

650

self.record('START', subdir, name)

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

651

result = function(*args, **dargs)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

652

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

653

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

654

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

655

except Exception, e:

656

err_msg = str(e) + '\n'

657

err_msg += traceback.format_exc()

658

self.record('END ABORT', subdir, name, err_msg)

659

raise error.JobError(name + ' failed\n' + traceback.format_exc())

660

else:

661

self.record('END GOOD', subdir, name)

662

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

663

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

664

665

666

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

672

"""

673

674

name = function.__name__

675

676

# Allow the tag for the group to be specified.

677

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

681

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

682

683

684

def run_reboot(self, reboot_func, get_kernel_func):

685

"""\

686

A specialization of run_group meant specifically for handling

687

a reboot. Includes support for capturing the kernel version

688

after the reboot.

689

690

reboot_func: a function that carries out the reboot

691

692

get_kernel_func: a function that returns a string

693

representing the kernel version.

694

"""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

695

try:

696

self.record('START', None, 'reboot')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

697

reboot_func()

698

except Exception, e:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

699

err_msg = str(e) + '\n' + traceback.format_exc()

700

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

701

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

702

else:

703

kernel = get_kernel_func()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

704

self.record('END GOOD', None, 'reboot',

Dale Curtis

74a314b

2011-06-23 14:55:46 -0700

[diff] [blame]

705

optional_fields={"kernel": kernel})

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

706

707

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

708

def run_control(self, path):

709

"""Execute a control file found at path (relative to the autotest

710

path). Intended for executing a control file within a control file,

711

not for running the top-level job control file."""

712

path = os.path.join(self.autodir, path)

713

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

714

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

715

716

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

717

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

718

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

723

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

724

725

726

def _add_sysinfo_loggable(self, loggable, on_every_test):

727

if on_every_test:

728

self.sysinfo.test_loggables.add(loggable)

729

else:

730

self.sysinfo.boot_loggables.add(loggable)

731

732

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

733

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

734

"""Poll all the warning loggers and extract any new warnings that have

735

been logged. If the warnings belong to a category that is currently

736

disabled, this method will discard them and they will no longer be

737

retrievable.

738

739

Returns a list of (timestamp, message) tuples, where timestamp is an

740

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

741

warnings = []

742

while True:

743

# pull in a line of output from every logger that has

744

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

745

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

746

closed_loggers = set()

747

for logger in loggers:

748

line = logger.readline()

749

# record any broken pipes (aka line == empty)

750

if len(line) == 0:

751

closed_loggers.add(logger)

752

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

753

# parse out the warning

754

timestamp, msgtype, msg = line.split('\t', 2)

755

timestamp = int(timestamp)

756

# if the warning is valid, add it to the results

757

if self.warning_manager.is_valid(timestamp, msgtype):

758

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

759

760

# stop listening to loggers that are closed

761

self.warning_loggers -= closed_loggers

762

763

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

772

def _unique_subdirectory(self, base_subdirectory_name):

773

"""Compute a unique results subdirectory based on the given name.

774

775

Appends base_subdirectory_name with a number as necessary to find a

776

directory name that doesn't already exist.

777

"""

778

subdirectory = base_subdirectory_name

779

counter = 1

780

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

781

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

786

def get_record_context(self):

787

"""Returns an object representing the current job.record context.

788

789

The object returned is an opaque object with a 0-arg restore method

790

which can be called to restore the job.record context (i.e. indentation)

791

to the current level. The intention is that it should be used when

792

something external which generate job.record calls (e.g. an autotest

793

client) can fail catastrophically and the server job record state

794

needs to be reset to its original "known good" state.

795

796

@return: A context object with a 0-arg restore() method."""

797

return self._indenter.get_context()

798

799

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

800

def record_summary(self, status_code, test_name, reason='', attributes=None,

801

distinguishing_attributes=(), child_test_ids=None):

802

"""Record a summary test result.

803

804

@param status_code: status code string, see

805

common_lib.log.is_valid_status()

806

@param test_name: name of the test

807

@param reason: (optional) string providing detailed reason for test

808

outcome

809

@param attributes: (optional) dict of string keyvals to associate with

810

this result

811

@param distinguishing_attributes: (optional) list of attribute names

812

that should be used to distinguish identically-named test

813

results. These attributes should be present in the attributes

814

parameter. This is used to generate user-friendly subdirectory

815

names.

816

@param child_test_ids: (optional) list of test indices for test results

817

used in generating this result.

818

"""

819

subdirectory_name_parts = [test_name]

820

for attribute in distinguishing_attributes:

821

assert attributes

822

assert attribute in attributes, '%s not in %s' % (attribute,

823

attributes)

824

subdirectory_name_parts.append(attributes[attribute])

825

base_subdirectory_name = '.'.join(subdirectory_name_parts)

826

827

subdirectory = self._unique_subdirectory(base_subdirectory_name)

828

subdirectory_path = os.path.join(self.resultdir, subdirectory)

829

os.mkdir(subdirectory_path)

830

831

self.record(status_code, subdirectory, test_name,

832

status=reason, optional_fields={'is_summary': True})

833

834

if attributes:

835

utils.write_keyval(subdirectory_path, attributes)

836

837

if child_test_ids:

838

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

839

summary_data = {'child_test_ids': ids_string}

840

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

844

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

845

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

846

self.record("INFO", None, None,

847

"disabling %s warnings" % warning_type,

848

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

849

850

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

851

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

852

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

853

self.record("INFO", None, None,

854

"enabling %s warnings" % warning_type,

855

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

856

857

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

858

def get_status_log_path(self, subdir=None):

859

"""Return the path to the job status log.

860

861

@param subdir - Optional paramter indicating that you want the path

862

to a subdirectory status log.

863

864

@returns The path where the status log should be.

865

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

866

if self.resultdir:

867

if subdir:

868

return os.path.join(self.resultdir, subdir, "status.log")

869

else:

870

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

871

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

872

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

873

874

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

875

def _update_uncollected_logs_list(self, update_func):

876

"""Updates the uncollected logs list in a multi-process safe manner.

877

878

@param update_func - a function that updates the list of uncollected

879

logs. Should take one parameter, the list to be updated.

880

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

881

if self._uncollected_log_file:

882

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

883

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

884

try:

885

uncollected_logs = pickle.load(log_file)

886

update_func(uncollected_logs)

887

log_file.seek(0)

888

log_file.truncate()

889

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

890

log_file.flush()

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

891

finally:

892

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

897

"""Adds a new set of client logs to the list of uncollected logs,

898

to allow for future log recovery.

899

900

@param host - the hostname of the machine holding the logs

901

@param remote_path - the directory on the remote machine holding logs

902

@param local_path - the local directory to copy the logs into

903

"""

904

def update_func(logs_list):

905

logs_list.append((hostname, remote_path, local_path))

906

self._update_uncollected_logs_list(update_func)

907

908

909

def remove_client_log(self, hostname, remote_path, local_path):

910

"""Removes a set of client logs from the list of uncollected logs,

911

to allow for future log recovery.

912

913

@param host - the hostname of the machine holding the logs

914

@param remote_path - the directory on the remote machine holding logs

915

@param local_path - the local directory to copy the logs into

916

"""

917

def update_func(logs_list):

918

logs_list.remove((hostname, remote_path, local_path))

919

self._update_uncollected_logs_list(update_func)

920

921

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

922

def get_client_logs(self):

923

"""Retrieves the list of uncollected logs, if it exists.

924

925

@returns A list of (host, remote_path, local_path) tuples. Returns

926

an empty list if no uncollected logs file exists.

927

"""

928

log_exists = (self._uncollected_log_file and

929

os.path.exists(self._uncollected_log_file))

930

if log_exists:

931

return pickle.load(open(self._uncollected_log_file))

else:

return []

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

936

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

937

"""

938

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

939

940

This sets up the control file API by importing modules and making them

941

available under the appropriate names within namespace.

942

943

For use by _execute_code().

944

945

Args:

946

namespace: The namespace dictionary to fill in.

947

protect: Boolean. If True (the default) any operation that would

948

clobber an existing entry in namespace will cause an error.

949

Raises:

950

error.AutoservError: When a name would be clobbered by import.

951

"""

952

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

953

"""

954

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

955

956

Args:

957

module_name: The string module name.

958

names: A limiting list of names to import from module_name. If

959

empty (the default), all names are imported from the module

960

similar to a "from foo.bar import *" statement.

961

Raises:

962

error.AutoservError: When a name being imported would clobber

963

a name already in namespace.

964

"""

965

module = __import__(module_name, {}, {}, names)

966

967

# No names supplied? Import * from the lowest level module.

968

# (Ugh, why do I have to implement this part myself?)

969

if not names:

970

for submodule_name in module_name.split('.')[1:]:

971

module = getattr(module, submodule_name)

972

if hasattr(module, '__all__'):

973

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

978

# doesn't override anything that already exists.

979

for name in names:

980

# Check for conflicts to help prevent future problems.

981

if name in namespace and protect:

982

if namespace[name] is not getattr(module, name):

983

raise error.AutoservError('importing name '

984

'%s from %s %r would override %r' %

985

(name, module_name, getattr(module, name),

986

namespace[name]))

987

else:

988

# Encourage cleanliness and the use of __all__ for a

989

# more concrete API with less surprises on '*' imports.

990

warnings.warn('%s (%r) being imported from %s for use '

991

'in server control files is not the '

992

'first occurrance of that import.' %

993

(name, namespace[name], module_name))

994

995

namespace[name] = getattr(module, name)

996

997

998

# This is the equivalent of prepending a bunch of import statements to

999

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

1000

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1001

_import_names('autotest_lib.server',

1002

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

1003

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

1004

_import_names('autotest_lib.server.subcommand',

1005

('parallel', 'parallel_simple', 'subcommand'))

1006

_import_names('autotest_lib.server.utils',

1007

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

1008

_import_names('autotest_lib.client.common_lib.error')

1009

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

1010

1011

# Inject ourself as the job object into other classes within the API.

1012

# (Yuck, this injection is a gross thing be part of a public API. -gps)

1013

#

1014

# XXX Base & SiteAutotest do not appear to use .job. Who does?

1015

namespace['autotest'].Autotest.job = self

1016

# server.hosts.base_classes.Host uses .job.

1017

namespace['hosts'].Host.job = self

Eric Li

10222b8

2010-11-24 09:33:15 -0800

[diff] [blame]

1018

namespace['hosts'].factory.ssh_user = self._ssh_user

1019

namespace['hosts'].factory.ssh_port = self._ssh_port

1020

namespace['hosts'].factory.ssh_pass = self._ssh_pass

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1021

1022

1023

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1024

"""

1025

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1026

1027

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

1032

namespace: A dict containing names to make available during execution.

1033

protect: Boolean. If True (the default) a copy of the namespace dict

1034

is used during execution to prevent the code from modifying its

1035

contents outside of this function. If False the raw dict is

1036

passed in and modifications will be allowed.

1037

"""

1038

if protect:

1039

namespace = namespace.copy()

1040

self._fill_server_control_namespace(namespace, protect=protect)

1041

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1042

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1043

machines_text = '\n'.join(self.machines) + '\n'

1044

# Only rewrite the file if it does not match our machine list.

1045

try:

1046

machines_f = open(MACHINES_FILENAME, 'r')

1047

existing_machines_text = machines_f.read()

1048

machines_f.close()

1049

except EnvironmentError:

1050

existing_machines_text = None

1051

if machines_text != existing_machines_text:

1052

utils.open_write_close(MACHINES_FILENAME, machines_text)

1053

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1054

1055

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1056

def _parse_status(self, new_line):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1057

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1058

return

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1059

new_tests = self.parser.process_lines([new_line])

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1060

for test in new_tests:

1061

self.__insert_test(test)

1062

1063

1064

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1065

"""

1066

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1067

database. This method will not raise an exception, even if an

1068

error occurs during the insert, to avoid failing a test

1069

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1070

self.num_tests_run += 1

1071

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1072

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1073

try:

1074

self.results_db.insert_test(self.job_model, test)

1075

except Exception:

1076

msg = ("WARNING: An unexpected error occured while "

1077

"inserting test results into the database. "

1078

"Ignoring error.\n" + traceback.format_exc())

1079

print >> sys.stderr, msg

1080

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1081

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1082

def preprocess_client_state(self):

1083

"""

1084

Produce a state file for initializing the state of a client job.

1085

1086

Creates a new client state file with all the current server state, as

1087

well as some pre-set client state.

1088

1089

@returns The path of the file the state was written into.

1090

"""

1091

# initialize the sysinfo state

1092

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1093

1094

# dump the state out to a tempfile

1095

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1096

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1097

1098

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1099

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1104

"""

1105

Update the state of this job with the state from a client job.

1106

1107

Updates the state of the server side of a job with the final state

1108

of a client job that was run. Updates the non-client-specific state,

1109

pulls in some specific bits from the client-specific state, and then

1110

discards the rest. Removes the state file afterwards

1111

1112

@param state_file A path to the state file from the client.

1113

"""

1114

# update the on-disk state

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1115

try:

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1116

self._state.read_from_file(state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1117

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1118

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1119

# ignore file-not-found errors

1120

if e.errno != errno.ENOENT:

1121

raise

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1122

else:

1123

logging.debug('Client state file %s not found', state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1124

1125

# update the sysinfo state

1126

if self._state.has('client', 'sysinfo'):

1127

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1128

1129

# drop all the client-specific state

1130

self._state.discard_namespace('client')

1131

1132

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

1133

def clear_all_known_hosts(self):

1134

"""Clears known hosts files for all AbstractSSHHosts."""

1135

for host in self.hosts:

1136

if isinstance(host, abstract_ssh.AbstractSSHHost):

1137

host.clear_known_hosts()

1138

1139

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1140

class warning_manager(object):

1141

"""Class for controlling warning logs. Manages the enabling and disabling

1142

of warnings."""

1143

def __init__(self):

1144

# a map of warning types to a list of disabled time intervals

1145

self.disabled_warnings = {}

1146

1147

1148

def is_valid(self, timestamp, warning_type):

1149

"""Indicates if a warning (based on the time it occured and its type)

1150

is a valid warning. A warning is considered "invalid" if this type of

1151

warning was marked as "disabled" at the time the warning occured."""

1152

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1153

for start, end in disabled_intervals:

1154

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1160

"""As of now, disables all further warnings of this type."""

1161

intervals = self.disabled_warnings.setdefault(warning_type, [])

1162

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1163

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1164

1165

1166

def enable_warnings(self, warning_type, current_time_func=time.time):

1167

"""As of now, enables all further warnings of this type."""

1168

intervals = self.disabled_warnings.get(warning_type, [])

1169

if intervals and intervals[-1][1] is None:

jadmanski