Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2013-01-25 16:15:20 -0500

[diff] [blame]

12

import getpass, os, sys, re, tempfile, time, select, platform

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

13

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

14

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

15

from autotest_lib.client.common_lib import base_job

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

16

from autotest_lib.client.common_lib import error, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

17

from autotest_lib.client.common_lib import logging_manager

Paul Pendlebury

5759356

2011-06-15 10:45:49 -0700

[diff] [blame]

18

from autotest_lib.server import test, subcommand, profilers

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

19

from autotest_lib.server.hosts import abstract_ssh

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

20

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

21

22

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

23

def _control_segment_path(name):

24

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

25

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

26

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

27

28

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

29

CLIENT_CONTROL_FILENAME = 'control'

30

SERVER_CONTROL_FILENAME = 'control.srv'

31

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

32

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

33

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

34

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

35

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

36

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

37

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

38

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

39

REPAIR_CONTROL_FILE = _control_segment_path('repair')

Alex Miller

cb79ba7

2013-05-29 14:43:00 -0700

[diff] [blame^]

40

PROVISION_CONTROL_FILE = _control_segment_path('provision')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

41

42

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

43

# by default provide a stub that generates no site data

44

def _get_site_job_data_dummy(job):

return {}

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

48

class status_indenter(base_job.status_indenter):

49

"""Provide a simple integer-backed status indenter."""

def __init__(self):

self._indent = 0

@property

def indent(self):

return self._indent

def increment(self):

self._indent += 1

def decrement(self):

self._indent -= 1

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

67

def get_context(self):

68

"""Returns a context object for use by job.get_record_context."""

69

class context(object):

70

def __init__(self, indenter, indent):

71

self._indenter = indenter

72

self._indent = indent

73

def restore(self):

74

self._indenter._indent = self._indent

75

return context(self, self._indent)

76

77

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

78

class server_job_record_hook(object):

79

"""The job.record hook for server job. Used to inject WARN messages from

80

the console or vlm whenever new logs are written, and to echo any logs

81

to INFO level logging. Implemented as a class so that it can use state to

82

block recursive calls, so that the hook can call job.record itself to

83

log WARN messages.

84

85

Depends on job._read_warnings and job._logger.

86

"""

87

def __init__(self, job):

88

self._job = job

89

self._being_called = False

90

91

92

def __call__(self, entry):

93

"""A wrapper around the 'real' record hook, the _hook method, which

94

prevents recursion. This isn't making any effort to be threadsafe,

95

the intent is to outright block infinite recursion via a

96

job.record->_hook->job.record->_hook->job.record... chain."""

97

if self._being_called:

98

return

99

self._being_called = True

100

try:

101

self._hook(self._job, entry)

102

finally:

103

self._being_called = False

@staticmethod

def _hook(job, entry):

108

"""The core hook, which can safely call job.record."""

109

entries = []

110

# poll all our warning loggers for new warnings

111

for timestamp, msg in job._read_warnings():

112

warning_entry = base_job.status_log_entry(

113

'WARN', None, None, msg, {}, timestamp=timestamp)

114

entries.append(warning_entry)

115

job.record_entry(warning_entry)

116

# echo rendered versions of all the status logs to info

117

entries.append(entry)

118

for entry in entries:

119

rendered_entry = job._logger.render_entry(entry)

120

logging.info(rendered_entry)

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

121

job._parse_status(rendered_entry)

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

122

123

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

124

class base_server_job(base_job.base_job):

125

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

126

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

127

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

136

"""

137

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

138

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

139

140

def __init__(self, control, args, resultdir, label, user, machines,

141

client=False, parse_job='',

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

142

ssh_user='root', ssh_port=22, ssh_pass='', test_retry=0,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

143

group_name='', tag='',

144

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

145

"""

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

146

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

147

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

148

@param control: The pathname of the control file.

149

@param args: Passed to the control file.

150

@param resultdir: Where to throw the results.

151

@param label: Description of the job.

152

@param user: Username for the job (email address).

153

@param client: True if this is a client-side control file.

154

@param parse_job: string, if supplied it is the job execution tag that

155

the results will be passed through to the TKO parser with.

156

@param ssh_user: The SSH username. [root]

157

@param ssh_port: The SSH port number. [22]

158

@param ssh_pass: The SSH passphrase, if needed.

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

159

@param test_retry: The number of times to retry a test if the test did

160

not complete successfully.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

161

@param group_name: If supplied, this will be written out as

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

162

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

163

@param tag: The job execution tag from the scheduler. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

164

@param control_filename: The filename where the server control file

165

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

166

"""

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

167

super(base_server_job, self).__init__(resultdir=resultdir,

168

test_retry=test_retry)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

169

path = os.path.dirname(__file__)

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

170

self.test_retry = test_retry

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

171

self.control = control

172

self._uncollected_log_file = os.path.join(self.resultdir,

173

'uncollected_logs')

174

debugdir = os.path.join(self.resultdir, 'debug')

175

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

182

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

183

self.args = args

Peter Mayo

7a87576

2012-06-13 14:38:15 -0400

[diff] [blame]

184

self.label = label

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

185

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

186

self._client = client

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

187

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

188

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

189

self._ssh_user = ssh_user

190

self._ssh_port = ssh_port

191

self._ssh_pass = ssh_pass

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

192

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

193

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

194

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

195

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

196

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

197

self._control_filename = control_filename

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

198

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

199

self.logging = logging_manager.get_logging_manager(

200

manage_stdout_and_stderr=True, redirect_fds=True)

201

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

202

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

203

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

204

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

205

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

206

job_data = {'label' : label, 'user' : user,

207

'hostname' : ','.join(machines),

Eric Li

861b2d5

2011-02-04 14:50:35 -0800

[diff] [blame]

208

'drone' : platform.node(),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

209

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

210

'job_started' : str(int(time.time()))}

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

211

if group_name:

212

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

213

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

214

# only write these keyvals out on the first job in a resultdir

215

if 'job_started' not in utils.read_keyval(self.resultdir):

216

job_data.update(get_site_job_data(self))

217

utils.write_keyval(self.resultdir, job_data)

218

219

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

220

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

221

self.pkgmgr = packages.PackageManager(

222

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

223

self.num_tests_run = 0

224

self.num_tests_failed = 0

225

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

226

self._register_subcommand_hooks()

227

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

228

# these components aren't usable on the server

229

self.bootloader = None

230

self.harness = None

231

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

232

# set up the status logger

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

233

self._indenter = status_indenter()

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

234

self._logger = base_job.status_logger(

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

235

self, self._indenter, 'status.log', 'status.log',

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

236

record_hook=server_job_record_hook(self))

237

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

238

239

@classmethod

240

def _find_base_directories(cls):

241

"""

242

Determine locations of autodir, clientdir and serverdir. Assumes

243

that this file is located within serverdir and uses __file__ along

244

with relative paths to resolve the location.

245

"""

246

serverdir = os.path.abspath(os.path.dirname(__file__))

247

autodir = os.path.normpath(os.path.join(serverdir, '..'))

248

clientdir = os.path.join(autodir, 'client')

249

return autodir, clientdir, serverdir

250

251

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

252

def _find_resultdir(self, resultdir, *args, **dargs):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

253

"""

254

Determine the location of resultdir. For server jobs we expect one to

255

always be explicitly passed in to __init__, so just return that.

256

"""

257

if resultdir:

258

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

262

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

263

def _get_status_logger(self):

264

"""Return a reference to the status logger."""

return self._logger

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

268

@staticmethod

269

def _load_control_file(path):

270

f = open(path)

271

try:

272

control_file = f.read()

273

finally:

274

f.close()

275

return re.sub('\r', '', control_file)

276

277

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

278

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

279

"""

280

Register some hooks into the subcommand modules that allow us

281

to properly clean up self.hosts created in forked subprocesses.

282

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

283

def on_fork(cmd):

284

self._existing_hosts_on_fork = set(self.hosts)

285

def on_join(cmd):

286

new_hosts = self.hosts - self._existing_hosts_on_fork

287

for host in new_hosts:

288

host.close()

289

subcommand.subcommand.register_fork_hook(on_fork)

290

subcommand.subcommand.register_join_hook(on_join)

291

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

292

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

293

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

294

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

295

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

296

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

297

the database if necessary.

298

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

299

if not self._using_parser:

300

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

301

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

302

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

303

parse_log = open(parse_log, 'w', 0)

304

tko_utils.redirect_parser_debugging(parse_log)

305

# create a job model object and set up the db

306

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

307

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

308

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

309

self.parser.start(self.job_model)

310

# check if a job already exists in the db and insert it if

311

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

312

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

313

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

314

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

315

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

316

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

317

self.job_model.index = job_idx

318

self.job_model.machine_idx = machine_idx

319

320

321

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

322

"""

323

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

324

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

325

remaining test results to the results db)

326

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

327

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

328

return

329

final_tests = self.parser.end()

330

for test in final_tests:

331

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

332

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def verify(self):

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

337

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

338

if self.resultdir:

339

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

340

try:

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

341

namespace = {'machines' : self.machines, 'job' : self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

342

'ssh_user' : self._ssh_user,

343

'ssh_port' : self._ssh_port,

344

'ssh_pass' : self._ssh_pass}

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

345

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

346

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

347

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

348

self.record('ABORT', None, None, msg)

raise

def repair(self, host_protection):

353

if not self.machines:

354

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

355

if self.resultdir:

356

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

357

namespace = {'machines': self.machines, 'job': self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

358

'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,

359

'ssh_pass': self._ssh_pass,

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

360

'protection_level': host_protection}

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

361

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

362

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

363

364

Alex Miller

cb79ba7

2013-05-29 14:43:00 -0700

[diff] [blame^]

365

def provision(self, labels):

366

"""

367

Provision all hosts to match |labels|.

368

369

@param labels: A comma seperated string of labels to provision the

host to.

"""

namespace = {'provision_labels': labels}

374

control = self._load_control_file(PROVISION_CONTROL_FILE)

375

self.run(control=control, namespace=namespace)

376

377

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

378

def precheck(self):

379

"""

380

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

386

"""

387

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

393

"""

394

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

400

"""

401

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

406

def _make_parallel_wrapper(self, function, machines, log):

407

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

408

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

409

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

410

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

411

self._parse_job += "/" + machine

412

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

413

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

414

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

415

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

416

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

417

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

418

result = function(machine)

419

self.cleanup_parser()

420

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

421

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

422

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

423

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

424

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

425

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

426

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

427

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

428

result = function(machine)

429

return result

430

else:

431

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

436

return_results=False):

437

"""

438

Run 'function' using parallel_simple, with an extra wrapper to handle

439

the necessary setup for continuous parsing, if possible. If continuous

440

parsing is already properly initialized then this should just work.

441

442

@param function: A callable to run in parallel given each machine.

443

@param machines: A list of machine names to be passed one per subcommand

444

invocation of function.

445

@param log: If True, output will be written to output in a subdirectory

446

named after each machine.

447

@param timeout: Seconds after which the function call should timeout.

448

@param return_results: If True instead of an AutoServError being raised

449

on any error a list of the results|exceptions from the function

450

called on each arg is returned. [default: False]

451

452

@raises error.AutotestError: If any of the functions failed.

453

"""

454

wrapper = self._make_parallel_wrapper(function, machines, log)

455

return subcommand.parallel_simple(wrapper, machines,

456

log=log, timeout=timeout,

457

return_results=return_results)

458

459

460

def parallel_on_machines(self, function, machines, timeout=None):

461

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

462

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

463

@param machines: A list of machines to call function(machine) on.

464

@param timeout: Seconds after which the function call should timeout.

465

466

@returns A list of machines on which function(machine) returned

467

without raising an exception.

468

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

469

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

470

return_results=True)

471

success_machines = []

472

for result, machine in itertools.izip(results, machines):

473

if not isinstance(result, Exception):

474

success_machines.append(machine)

475

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

476

477

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

478

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

479

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

480

collect_crashdumps=True, namespace={}, control=None,

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

481

control_file_dir=None, only_collect_crashinfo=False):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

482

# for a normal job, make sure the uncollected logs file exists

483

# for a crashinfo-only run it should already exist, bail out otherwise

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

484

created_uncollected_logs = False

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

485

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

486

if only_collect_crashinfo:

487

# if this is a crashinfo-only run, and there were no existing

488

# uncollected logs, just bail out early

489

logging.info("No existing uncollected logs, "

490

"skipping crashinfo collection")

491

return

492

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

493

log_file = open(self._uncollected_log_file, "w")

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

494

pickle.dump([], log_file)

495

log_file.close()

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

496

created_uncollected_logs = True

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

497

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

498

# use a copy so changes don't affect the original dictionary

499

namespace = namespace.copy()

500

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

501

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

502

if self.control is None:

503

control = ''

504

else:

505

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

506

if control_file_dir is None:

507

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

508

509

self.aborted = False

510

namespace['machines'] = machines

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

511

namespace['args'] = self.args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

512

namespace['job'] = self

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

513

namespace['ssh_user'] = self._ssh_user

514

namespace['ssh_port'] = self._ssh_port

515

namespace['ssh_pass'] = self._ssh_pass

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

516

test_start_time = int(time.time())

517

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

518

if self.resultdir:

519

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

520

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

521

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

522

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

523

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

524

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

525

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

526

try:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

527

try:

528

if install_before and machines:

529

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

530

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

531

if only_collect_crashinfo:

532

return

533

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

534

# determine the dir to write the control files to

535

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

536

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

537

if cfd_specified:

538

temp_control_file_dir = None

539

else:

540

temp_control_file_dir = tempfile.mkdtemp(

541

suffix='temp_control_file_dir')

542

control_file_dir = temp_control_file_dir

543

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

544

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

545

client_control_file = os.path.join(control_file_dir,

546

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

547

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

548

namespace['control'] = control

549

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

550

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

551

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

552

else:

553

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

554

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

555

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

556

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

557

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

558

# no error occured, so we don't need to collect crashinfo

559

collect_crashinfo = False

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

560

except Exception, e:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

561

try:

562

logging.exception(

563

'Exception escaped control file, job aborting:')

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

564

self.record('INFO', None, None, str(e),

565

{'job_abort_reason': str(e)})

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

566

except:

567

pass # don't let logging exceptions here interfere

568

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

569

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

570

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

571

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

572

try:

573

shutil.rmtree(temp_control_file_dir)

574

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

575

logging.warn('Could not remove temp directory %s: %s',

576

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

577

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

578

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

579

namespace['test_start_time'] = test_start_time

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

580

if collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

581

# includes crashdumps

582

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

583

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

584

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

585

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

586

if cleanup and machines:

587

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

Chris Sosa

f4d43ff

2012-10-30 11:21:05 -0700

[diff] [blame]

588

if self._uncollected_log_file and created_uncollected_logs:

589

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

590

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

591

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

592

593

594

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

595

"""

596

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

597

598

tag

599

tag to add to testname

600

url

601

url of the test to run

602

"""

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

603

group, testname = self.pkgmgr.get_package_name(url, 'test')

604

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

605

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

610

except error.TestBaseException, e:

611

self.record(e.exit_status, subdir, testname, str(e))

612

raise

613

except Exception, e:

614

info = str(e) + "\n" + traceback.format_exc()

615

self.record('FAIL', subdir, testname, info)

616

raise

617

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

618

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

619

620

result, exc_info = self._run_group(testname, subdir, group_func)

621

if exc_info and isinstance(exc_info[1], error.TestBaseException):

622

return False

623

elif exc_info:

624

raise exc_info[0], exc_info[1], exc_info[2]

625

else:

626

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

627

628

629

def _run_group(self, name, subdir, function, *args, **dargs):

630

"""\

631

Underlying method for running something inside of a group.

632

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

633

result, exc_info = None, None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

634

try:

635

self.record('START', subdir, name)

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

636

result = function(*args, **dargs)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

637

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

638

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

639

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

640

except Exception, e:

641

err_msg = str(e) + '\n'

642

err_msg += traceback.format_exc()

643

self.record('END ABORT', subdir, name, err_msg)

644

raise error.JobError(name + ' failed\n' + traceback.format_exc())

645

else:

646

self.record('END GOOD', subdir, name)

647

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

648

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

649

650

651

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

657

"""

658

659

name = function.__name__

660

661

# Allow the tag for the group to be specified.

662

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

666

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

667

668

669

def run_reboot(self, reboot_func, get_kernel_func):

670

"""\

671

A specialization of run_group meant specifically for handling

672

a reboot. Includes support for capturing the kernel version

673

after the reboot.

674

675

reboot_func: a function that carries out the reboot

676

677

get_kernel_func: a function that returns a string

678

representing the kernel version.

679

"""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

680

try:

681

self.record('START', None, 'reboot')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

682

reboot_func()

683

except Exception, e:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

684

err_msg = str(e) + '\n' + traceback.format_exc()

685

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

686

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

687

else:

688

kernel = get_kernel_func()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

689

self.record('END GOOD', None, 'reboot',

Dale Curtis

74a314b

2011-06-23 14:55:46 -0700

[diff] [blame]

690

optional_fields={"kernel": kernel})

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

691

692

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

693

def run_control(self, path):

694

"""Execute a control file found at path (relative to the autotest

695

path). Intended for executing a control file within a control file,

696

not for running the top-level job control file."""

697

path = os.path.join(self.autodir, path)

698

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

699

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

700

701

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

702

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

703

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

708

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

709

710

711

def _add_sysinfo_loggable(self, loggable, on_every_test):

712

if on_every_test:

713

self.sysinfo.test_loggables.add(loggable)

714

else:

715

self.sysinfo.boot_loggables.add(loggable)

716

717

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

718

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

719

"""Poll all the warning loggers and extract any new warnings that have

720

been logged. If the warnings belong to a category that is currently

721

disabled, this method will discard them and they will no longer be

722

retrievable.

723

724

Returns a list of (timestamp, message) tuples, where timestamp is an

725

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

726

warnings = []

727

while True:

728

# pull in a line of output from every logger that has

729

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

730

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

731

closed_loggers = set()

732

for logger in loggers:

733

line = logger.readline()

734

# record any broken pipes (aka line == empty)

735

if len(line) == 0:

736

closed_loggers.add(logger)

737

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

738

# parse out the warning

739

timestamp, msgtype, msg = line.split('\t', 2)

740

timestamp = int(timestamp)

741

# if the warning is valid, add it to the results

742

if self.warning_manager.is_valid(timestamp, msgtype):

743

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

744

745

# stop listening to loggers that are closed

746

self.warning_loggers -= closed_loggers

747

748

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

757

def _unique_subdirectory(self, base_subdirectory_name):

758

"""Compute a unique results subdirectory based on the given name.

759

760

Appends base_subdirectory_name with a number as necessary to find a

761

directory name that doesn't already exist.

762

"""

763

subdirectory = base_subdirectory_name

764

counter = 1

765

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

766

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

771

def get_record_context(self):

772

"""Returns an object representing the current job.record context.

773

774

The object returned is an opaque object with a 0-arg restore method

775

which can be called to restore the job.record context (i.e. indentation)

776

to the current level. The intention is that it should be used when

777

something external which generate job.record calls (e.g. an autotest

778

client) can fail catastrophically and the server job record state

779

needs to be reset to its original "known good" state.

780

781

@return: A context object with a 0-arg restore() method."""

782

return self._indenter.get_context()

783

784

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

785

def record_summary(self, status_code, test_name, reason='', attributes=None,

786

distinguishing_attributes=(), child_test_ids=None):

787

"""Record a summary test result.

788

789

@param status_code: status code string, see

790

common_lib.log.is_valid_status()

791

@param test_name: name of the test

792

@param reason: (optional) string providing detailed reason for test

793

outcome

794

@param attributes: (optional) dict of string keyvals to associate with

795

this result

796

@param distinguishing_attributes: (optional) list of attribute names

797

that should be used to distinguish identically-named test

798

results. These attributes should be present in the attributes

799

parameter. This is used to generate user-friendly subdirectory

800

names.

801

@param child_test_ids: (optional) list of test indices for test results

802

used in generating this result.

803

"""

804

subdirectory_name_parts = [test_name]

805

for attribute in distinguishing_attributes:

806

assert attributes

807

assert attribute in attributes, '%s not in %s' % (attribute,

808

attributes)

809

subdirectory_name_parts.append(attributes[attribute])

810

base_subdirectory_name = '.'.join(subdirectory_name_parts)

811

812

subdirectory = self._unique_subdirectory(base_subdirectory_name)

813

subdirectory_path = os.path.join(self.resultdir, subdirectory)

814

os.mkdir(subdirectory_path)

815

816

self.record(status_code, subdirectory, test_name,

817

status=reason, optional_fields={'is_summary': True})

818

819

if attributes:

820

utils.write_keyval(subdirectory_path, attributes)

821

822

if child_test_ids:

823

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

824

summary_data = {'child_test_ids': ids_string}

825

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

829

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

830

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

831

self.record("INFO", None, None,

832

"disabling %s warnings" % warning_type,

833

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

834

835

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

836

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

837

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

838

self.record("INFO", None, None,

839

"enabling %s warnings" % warning_type,

840

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

841

842

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

843

def get_status_log_path(self, subdir=None):

844

"""Return the path to the job status log.

845

846

@param subdir - Optional paramter indicating that you want the path

847

to a subdirectory status log.

848

849

@returns The path where the status log should be.

850

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

851

if self.resultdir:

852

if subdir:

853

return os.path.join(self.resultdir, subdir, "status.log")

854

else:

855

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

856

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

857

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

858

859

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

860

def _update_uncollected_logs_list(self, update_func):

861

"""Updates the uncollected logs list in a multi-process safe manner.

862

863

@param update_func - a function that updates the list of uncollected

864

logs. Should take one parameter, the list to be updated.

865

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

866

if self._uncollected_log_file:

867

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

868

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

869

try:

870

uncollected_logs = pickle.load(log_file)

871

update_func(uncollected_logs)

872

log_file.seek(0)

873

log_file.truncate()

874

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

875

log_file.flush()

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

876

finally:

877

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

882

"""Adds a new set of client logs to the list of uncollected logs,

883

to allow for future log recovery.

884

885

@param host - the hostname of the machine holding the logs

886

@param remote_path - the directory on the remote machine holding logs

887

@param local_path - the local directory to copy the logs into

888

"""

889

def update_func(logs_list):

890

logs_list.append((hostname, remote_path, local_path))

891

self._update_uncollected_logs_list(update_func)

892

893

894

def remove_client_log(self, hostname, remote_path, local_path):

895

"""Removes a set of client logs from the list of uncollected logs,

896

to allow for future log recovery.

897

898

@param host - the hostname of the machine holding the logs

899

@param remote_path - the directory on the remote machine holding logs

900

@param local_path - the local directory to copy the logs into

901

"""

902

def update_func(logs_list):

903

logs_list.remove((hostname, remote_path, local_path))

904

self._update_uncollected_logs_list(update_func)

905

906

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

907

def get_client_logs(self):

908

"""Retrieves the list of uncollected logs, if it exists.

909

910

@returns A list of (host, remote_path, local_path) tuples. Returns

911

an empty list if no uncollected logs file exists.

912

"""

913

log_exists = (self._uncollected_log_file and

914

os.path.exists(self._uncollected_log_file))

915

if log_exists:

916

return pickle.load(open(self._uncollected_log_file))

else:

return []

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

921

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

922

"""

923

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

924

925

This sets up the control file API by importing modules and making them

926

available under the appropriate names within namespace.

927

928

For use by _execute_code().

929

930

Args:

931

namespace: The namespace dictionary to fill in.

932

protect: Boolean. If True (the default) any operation that would

933

clobber an existing entry in namespace will cause an error.

934

Raises:

935

error.AutoservError: When a name would be clobbered by import.

936

"""

937

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

938

"""

939

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

940

941

Args:

942

module_name: The string module name.

943

names: A limiting list of names to import from module_name. If

944

empty (the default), all names are imported from the module

945

similar to a "from foo.bar import *" statement.

946

Raises:

947

error.AutoservError: When a name being imported would clobber

948

a name already in namespace.

949

"""

950

module = __import__(module_name, {}, {}, names)

951

952

# No names supplied? Import * from the lowest level module.

953

# (Ugh, why do I have to implement this part myself?)

954

if not names:

955

for submodule_name in module_name.split('.')[1:]:

956

module = getattr(module, submodule_name)

957

if hasattr(module, '__all__'):

958

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

963

# doesn't override anything that already exists.

964

for name in names:

965

# Check for conflicts to help prevent future problems.

966

if name in namespace and protect:

967

if namespace[name] is not getattr(module, name):

968

raise error.AutoservError('importing name '

969

'%s from %s %r would override %r' %

970

(name, module_name, getattr(module, name),

971

namespace[name]))

972

else:

973

# Encourage cleanliness and the use of __all__ for a

974

# more concrete API with less surprises on '*' imports.

975

warnings.warn('%s (%r) being imported from %s for use '

976

'in server control files is not the '

977

'first occurrance of that import.' %

978

(name, namespace[name], module_name))

979

980

namespace[name] = getattr(module, name)

981

982

983

# This is the equivalent of prepending a bunch of import statements to

984

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

985

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

986

_import_names('autotest_lib.server',

987

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

988

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

989

_import_names('autotest_lib.server.subcommand',

990

('parallel', 'parallel_simple', 'subcommand'))

991

_import_names('autotest_lib.server.utils',

992

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

993

_import_names('autotest_lib.client.common_lib.error')

994

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

995

996

# Inject ourself as the job object into other classes within the API.

997

# (Yuck, this injection is a gross thing be part of a public API. -gps)

998

#

999

# XXX Base & SiteAutotest do not appear to use .job. Who does?

1000

namespace['autotest'].Autotest.job = self

1001

# server.hosts.base_classes.Host uses .job.

1002

namespace['hosts'].Host.job = self

Eric Li

10222b8

2010-11-24 09:33:15 -0800

[diff] [blame]

1003

namespace['hosts'].factory.ssh_user = self._ssh_user

1004

namespace['hosts'].factory.ssh_port = self._ssh_port

1005

namespace['hosts'].factory.ssh_pass = self._ssh_pass

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1006

1007

1008

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1009

"""

1010

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1011

1012

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

1017

namespace: A dict containing names to make available during execution.

1018

protect: Boolean. If True (the default) a copy of the namespace dict

1019

is used during execution to prevent the code from modifying its

1020

contents outside of this function. If False the raw dict is

1021

passed in and modifications will be allowed.

1022

"""

1023

if protect:

1024

namespace = namespace.copy()

1025

self._fill_server_control_namespace(namespace, protect=protect)

1026

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1027

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1028

machines_text = '\n'.join(self.machines) + '\n'

1029

# Only rewrite the file if it does not match our machine list.

1030

try:

1031

machines_f = open(MACHINES_FILENAME, 'r')

1032

existing_machines_text = machines_f.read()

1033

machines_f.close()

1034

except EnvironmentError:

1035

existing_machines_text = None

1036

if machines_text != existing_machines_text:

1037

utils.open_write_close(MACHINES_FILENAME, machines_text)

1038

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1039

1040

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1041

def _parse_status(self, new_line):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1042

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1043

return

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1044

new_tests = self.parser.process_lines([new_line])

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1045

for test in new_tests:

1046

self.__insert_test(test)

1047

1048

1049

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1050

"""

1051

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1052

database. This method will not raise an exception, even if an

1053

error occurs during the insert, to avoid failing a test

1054

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1055

self.num_tests_run += 1

1056

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1057

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1058

try:

1059

self.results_db.insert_test(self.job_model, test)

1060

except Exception:

1061

msg = ("WARNING: An unexpected error occured while "

1062

"inserting test results into the database. "

1063

"Ignoring error.\n" + traceback.format_exc())

1064

print >> sys.stderr, msg

1065

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1066

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1067

def preprocess_client_state(self):

1068

"""

1069

Produce a state file for initializing the state of a client job.

1070

1071

Creates a new client state file with all the current server state, as

1072

well as some pre-set client state.

1073

1074

@returns The path of the file the state was written into.

1075

"""

1076

# initialize the sysinfo state

1077

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1078

1079

# dump the state out to a tempfile

1080

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1081

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1082

1083

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1084

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1089

"""

1090

Update the state of this job with the state from a client job.

1091

1092

Updates the state of the server side of a job with the final state

1093

of a client job that was run. Updates the non-client-specific state,

1094

pulls in some specific bits from the client-specific state, and then

1095

discards the rest. Removes the state file afterwards

1096

1097

@param state_file A path to the state file from the client.

1098

"""

1099

# update the on-disk state

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1100

try:

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1101

self._state.read_from_file(state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1102

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1103

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1104

# ignore file-not-found errors

1105

if e.errno != errno.ENOENT:

1106

raise

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1107

else:

1108

logging.debug('Client state file %s not found', state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1109

1110

# update the sysinfo state

1111

if self._state.has('client', 'sysinfo'):

1112

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1113

1114

# drop all the client-specific state

1115

self._state.discard_namespace('client')

1116

1117

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

1118

def clear_all_known_hosts(self):

1119

"""Clears known hosts files for all AbstractSSHHosts."""

1120

for host in self.hosts:

1121

if isinstance(host, abstract_ssh.AbstractSSHHost):

1122

host.clear_known_hosts()

1123

1124

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1125

class warning_manager(object):

1126

"""Class for controlling warning logs. Manages the enabling and disabling

1127

of warnings."""

1128

def __init__(self):

1129

# a map of warning types to a list of disabled time intervals

1130

self.disabled_warnings = {}

1131

1132

1133

def is_valid(self, timestamp, warning_type):

1134

"""Indicates if a warning (based on the time it occured and its type)

1135

is a valid warning. A warning is considered "invalid" if this type of

1136

warning was marked as "disabled" at the time the warning occured."""

1137

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1138

for start, end in disabled_intervals:

1139

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1145

"""As of now, disables all further warnings of this type."""

1146

intervals = self.disabled_warnings.setdefault(warning_type, [])

1147

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1148

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1149

1150

1151

def enable_warnings(self, warning_type, current_time_func=time.time):

1152

"""As of now, enables all further warnings of this type."""

1153

intervals = self.disabled_warnings.get(warning_type, [])

1154

if intervals and intervals[-1][1] is None:

jadmanski