Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2011-04-26 11:20:15 -0700

[diff] [blame]

13

import multiprocessing

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

14

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

15

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

16

from autotest_lib.client.common_lib import base_job

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

17

from autotest_lib.client.common_lib import error, log, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

18

from autotest_lib.client.common_lib import logging_manager

Paul Pendlebury

f807c18

2011-04-05 11:24:34 -0700

[diff] [blame]

19

from autotest_lib.server import test, subcommand, profilers, server_job_utils

Paul Pendlebury

7c1fdcf

2011-05-04 12:39:15 -0700

[diff] [blame]

20

from autotest_lib.server import gtest_runner

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

21

from autotest_lib.server.hosts import abstract_ssh

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

22

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

23

24

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

25

def _control_segment_path(name):

26

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

27

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

28

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

29

30

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

31

CLIENT_CONTROL_FILENAME = 'control'

32

SERVER_CONTROL_FILENAME = 'control.srv'

33

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

34

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

35

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

36

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

37

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

38

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

39

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

40

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

41

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

42

REPAIR_CONTROL_FILE = _control_segment_path('repair')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

43

44

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

45

# by default provide a stub that generates no site data

46

def _get_site_job_data_dummy(job):

return {}

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

50

# load up site-specific code for generating site-specific job data

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

51

get_site_job_data = utils.import_site_function(__file__,

jadmanski

c0a623d

2009-03-03 21:11:48 +0000

[diff] [blame]

52

"autotest_lib.server.site_server_job", "get_site_job_data",

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

53

_get_site_job_data_dummy)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

54

55

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

56

class status_indenter(base_job.status_indenter):

57

"""Provide a simple integer-backed status indenter."""

def __init__(self):

self._indent = 0

@property

def indent(self):

return self._indent

def increment(self):

self._indent += 1

def decrement(self):

self._indent -= 1

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

75

def get_context(self):

76

"""Returns a context object for use by job.get_record_context."""

77

class context(object):

78

def __init__(self, indenter, indent):

79

self._indenter = indenter

80

self._indent = indent

81

def restore(self):

82

self._indenter._indent = self._indent

83

return context(self, self._indent)

84

85

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

86

class server_job_record_hook(object):

87

"""The job.record hook for server job. Used to inject WARN messages from

88

the console or vlm whenever new logs are written, and to echo any logs

89

to INFO level logging. Implemented as a class so that it can use state to

90

block recursive calls, so that the hook can call job.record itself to

91

log WARN messages.

92

93

Depends on job._read_warnings and job._logger.

94

"""

95

def __init__(self, job):

96

self._job = job

97

self._being_called = False

98

99

100

def __call__(self, entry):

101

"""A wrapper around the 'real' record hook, the _hook method, which

102

prevents recursion. This isn't making any effort to be threadsafe,

103

the intent is to outright block infinite recursion via a

104

job.record->_hook->job.record->_hook->job.record... chain."""

105

if self._being_called:

106

return

107

self._being_called = True

108

try:

109

self._hook(self._job, entry)

110

finally:

111

self._being_called = False

@staticmethod

def _hook(job, entry):

116

"""The core hook, which can safely call job.record."""

117

entries = []

118

# poll all our warning loggers for new warnings

119

for timestamp, msg in job._read_warnings():

120

warning_entry = base_job.status_log_entry(

121

'WARN', None, None, msg, {}, timestamp=timestamp)

122

entries.append(warning_entry)

123

job.record_entry(warning_entry)

124

# echo rendered versions of all the status logs to info

125

entries.append(entry)

126

for entry in entries:

127

rendered_entry = job._logger.render_entry(entry)

128

logging.info(rendered_entry)

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

129

job._parse_status(rendered_entry)

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

130

131

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

132

class base_server_job(base_job.base_job):

133

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

134

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

135

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

144

"""

145

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

146

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

147

148

def __init__(self, control, args, resultdir, label, user, machines,

149

client=False, parse_job='',

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

150

ssh_user='root', ssh_port=22, ssh_pass='',

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

151

group_name='', tag='',

152

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

153

"""

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

154

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

155

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

156

@param control: The pathname of the control file.

157

@param args: Passed to the control file.

158

@param resultdir: Where to throw the results.

159

@param label: Description of the job.

160

@param user: Username for the job (email address).

161

@param client: True if this is a client-side control file.

162

@param parse_job: string, if supplied it is the job execution tag that

163

the results will be passed through to the TKO parser with.

164

@param ssh_user: The SSH username. [root]

165

@param ssh_port: The SSH port number. [22]

166

@param ssh_pass: The SSH passphrase, if needed.

167

@param group_name: If supplied, this will be written out as

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

168

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

169

@param tag: The job execution tag from the scheduler. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

170

@param control_filename: The filename where the server control file

171

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

172

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

173

super(base_server_job, self).__init__(resultdir=resultdir)

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

174

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

175

path = os.path.dirname(__file__)

176

self.control = control

177

self._uncollected_log_file = os.path.join(self.resultdir,

178

'uncollected_logs')

179

debugdir = os.path.join(self.resultdir, 'debug')

180

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

187

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

188

self.args = args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

189

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

190

self._client = client

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

191

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

192

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

193

self._ssh_user = ssh_user

194

self._ssh_port = ssh_port

195

self._ssh_pass = ssh_pass

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

196

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

197

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

198

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

199

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

200

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

201

self._control_filename = control_filename

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

202

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

203

self.logging = logging_manager.get_logging_manager(

204

manage_stdout_and_stderr=True, redirect_fds=True)

205

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

206

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

207

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

208

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

209

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

210

job_data = {'label' : label, 'user' : user,

211

'hostname' : ','.join(machines),

Eric Li

861b2d5

2011-02-04 14:50:35 -0800

[diff] [blame]

212

'drone' : platform.node(),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

213

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

214

'job_started' : str(int(time.time()))}

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

215

if group_name:

216

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

217

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

218

# only write these keyvals out on the first job in a resultdir

219

if 'job_started' not in utils.read_keyval(self.resultdir):

220

job_data.update(get_site_job_data(self))

221

utils.write_keyval(self.resultdir, job_data)

222

223

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

224

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

225

self.pkgmgr = packages.PackageManager(

226

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

227

self.num_tests_run = 0

228

self.num_tests_failed = 0

229

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

230

self._register_subcommand_hooks()

231

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

232

# these components aren't usable on the server

233

self.bootloader = None

234

self.harness = None

235

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

236

# set up the status logger

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

237

self._indenter = status_indenter()

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

238

self._logger = base_job.status_logger(

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

239

self, self._indenter, 'status.log', 'status.log',

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

240

record_hook=server_job_record_hook(self))

241

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

242

243

@classmethod

244

def _find_base_directories(cls):

245

"""

246

Determine locations of autodir, clientdir and serverdir. Assumes

247

that this file is located within serverdir and uses __file__ along

248

with relative paths to resolve the location.

249

"""

250

serverdir = os.path.abspath(os.path.dirname(__file__))

251

autodir = os.path.normpath(os.path.join(serverdir, '..'))

252

clientdir = os.path.join(autodir, 'client')

253

return autodir, clientdir, serverdir

254

255

256

def _find_resultdir(self, resultdir):

257

"""

258

Determine the location of resultdir. For server jobs we expect one to

259

always be explicitly passed in to __init__, so just return that.

260

"""

261

if resultdir:

262

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

266

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

267

def _get_status_logger(self):

268

"""Return a reference to the status logger."""

return self._logger

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

272

@staticmethod

273

def _load_control_file(path):

274

f = open(path)

275

try:

276

control_file = f.read()

277

finally:

278

f.close()

279

return re.sub('\r', '', control_file)

280

281

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

282

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

283

"""

284

Register some hooks into the subcommand modules that allow us

285

to properly clean up self.hosts created in forked subprocesses.

286

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

287

def on_fork(cmd):

288

self._existing_hosts_on_fork = set(self.hosts)

289

def on_join(cmd):

290

new_hosts = self.hosts - self._existing_hosts_on_fork

291

for host in new_hosts:

292

host.close()

293

subcommand.subcommand.register_fork_hook(on_fork)

294

subcommand.subcommand.register_join_hook(on_join)

295

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

296

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

297

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

298

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

299

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

300

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

301

the database if necessary.

302

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

303

if not self._using_parser:

304

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

305

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

306

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

307

parse_log = open(parse_log, 'w', 0)

308

tko_utils.redirect_parser_debugging(parse_log)

309

# create a job model object and set up the db

310

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

311

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

312

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

313

self.parser.start(self.job_model)

314

# check if a job already exists in the db and insert it if

315

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

316

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

317

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

318

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

319

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

320

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

321

self.job_model.index = job_idx

322

self.job_model.machine_idx = machine_idx

323

324

325

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

326

"""

327

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

328

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

329

remaining test results to the results db)

330

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

331

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

332

return

333

final_tests = self.parser.end()

334

for test in final_tests:

335

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

336

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def verify(self):

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

341

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

342

if self.resultdir:

343

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

344

try:

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

345

namespace = {'machines' : self.machines, 'job' : self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

346

'ssh_user' : self._ssh_user,

347

'ssh_port' : self._ssh_port,

348

'ssh_pass' : self._ssh_pass}

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

349

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

350

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

351

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

352

self.record('ABORT', None, None, msg)

raise

def repair(self, host_protection):

357

if not self.machines:

358

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

359

if self.resultdir:

360

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

361

namespace = {'machines': self.machines, 'job': self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

362

'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,

363

'ssh_pass': self._ssh_pass,

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

364

'protection_level': host_protection}

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

365

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

366

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def precheck(self):

"""

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

377

"""

378

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

384

"""

385

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

391

"""

392

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

397

def _make_parallel_wrapper(self, function, machines, log):

398

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

399

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

400

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

401

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

402

self._parse_job += "/" + machine

403

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

404

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

405

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

406

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

407

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

408

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

409

result = function(machine)

410

self.cleanup_parser()

411

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

412

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

413

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

414

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

415

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

416

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

417

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

418

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

419

result = function(machine)

420

return result

421

else:

422

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

427

return_results=False):

428

"""

429

Run 'function' using parallel_simple, with an extra wrapper to handle

430

the necessary setup for continuous parsing, if possible. If continuous

431

parsing is already properly initialized then this should just work.

432

433

@param function: A callable to run in parallel given each machine.

434

@param machines: A list of machine names to be passed one per subcommand

435

invocation of function.

436

@param log: If True, output will be written to output in a subdirectory

437

named after each machine.

438

@param timeout: Seconds after which the function call should timeout.

439

@param return_results: If True instead of an AutoServError being raised

440

on any error a list of the results|exceptions from the function

441

called on each arg is returned. [default: False]

442

443

@raises error.AutotestError: If any of the functions failed.

444

"""

445

wrapper = self._make_parallel_wrapper(function, machines, log)

446

return subcommand.parallel_simple(wrapper, machines,

447

log=log, timeout=timeout,

448

return_results=return_results)

449

450

451

def parallel_on_machines(self, function, machines, timeout=None):

452

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

453

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

454

@param machines: A list of machines to call function(machine) on.

455

@param timeout: Seconds after which the function call should timeout.

456

457

@returns A list of machines on which function(machine) returned

458

without raising an exception.

459

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

460

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

461

return_results=True)

462

success_machines = []

463

for result, machine in itertools.izip(results, machines):

464

if not isinstance(result, Exception):

465

success_machines.append(machine)

466

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

467

468

Paul Pendlebury

2011-04-26 11:20:15 -0700

[diff] [blame]

469

def distribute_across_machines(self, tests, machines,

470

continuous_parsing=False):

Paul Pendlebury

2011-03-31 14:45:32 -0700

[diff] [blame]

471

"""Run each test in tests once using machines.

472

473

Instead of running each test on each machine like parallel_on_machines,

474

run each test once across all machines. Put another way, the total

475

number of tests run by parallel_on_machines is len(tests) *

476

len(machines). The number of tests run by distribute_across_machines is

len(tests).

Args:

tests: List of tests to run.

Paul Pendlebury

2011-04-26 11:20:15 -0700

[diff] [blame]

481

machines: List of machines to use.

482

continuous_parsing: Bool, if true parse job while running.

Paul Pendlebury

2011-03-31 14:45:32 -0700

[diff] [blame]

483

"""

484

# The Queue is thread safe, but since a machine may have to search

485

# through the queue to find a valid test the lock provides exclusive

486

# queue access for more than just the get call.

Paul Pendlebury

2011-04-26 11:20:15 -0700

[diff] [blame]

487

test_queue = multiprocessing.JoinableQueue()

488

test_queue_lock = multiprocessing.Lock()

Paul Pendlebury

2011-03-31 14:45:32 -0700

[diff] [blame]

489

Paul Pendlebury

1f6f3e7

2011-04-13 11:16:44 -0700

[diff] [blame]

490

machine_workers = [server_job_utils.machine_worker(self,

491

machine,

Paul Pendlebury

f807c18

2011-04-05 11:24:34 -0700

[diff] [blame]

492

self.resultdir,

493

test_queue,

Paul Pendlebury

2011-04-26 11:20:15 -0700

[diff] [blame]

494

test_queue_lock,

495

continuous_parsing)

Paul Pendlebury

2011-03-31 14:45:32 -0700

[diff] [blame]

496

for machine in machines]

497

498

# To (potentially) speed up searching for valid tests create a list of

499

# unique attribute sets present in the machines for this job. If sets

500

# were hashable we could just use a dictionary for fast verification.

501

# This at least reduces the search space from the number of machines to

502

# the number of unique machines.

503

unique_machine_attributes = []

504

for mw in machine_workers:

505

if not mw.attribute_set in unique_machine_attributes:

506

unique_machine_attributes.append(mw.attribute_set)

507

508

# Only queue tests which are valid on at least one machine. Record

509

# skipped tests in the status.log file using record_skipped_test().

510

for test_entry in tests:

Paul Pendlebury

f807c18

2011-04-05 11:24:34 -0700

[diff] [blame]

511

ti = server_job_utils.test_item(*test_entry)

Paul Pendlebury

2011-03-31 14:45:32 -0700

[diff] [blame]

512

machine_found = False

513

for ma in unique_machine_attributes:

if ti.validate(ma):

test_queue.put(ti)

machine_found = True

break

if not machine_found:

519

self.record_skipped_test(ti)

520

521

# Run valid tests and wait for completion.

522

for worker in machine_workers:

worker.start()

test_queue.join()

def record_skipped_test(self, skipped_test, message=None):

528

"""Insert a failure record into status.log for this test."""

529

msg = message

530

if msg is None:

531

msg = 'No valid machines found for test %s.' % skipped_test

532

logging.info(msg)

533

self.record('START', None, skipped_test.test_name)

534

self.record('INFO', None, skipped_test.test_name, msg)

535

self.record('END TEST_NA', None, skipped_test.test_name, msg)

536

537

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

538

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

539

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

540

collect_crashdumps=True, namespace={}, control=None,

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

541

control_file_dir=None, only_collect_crashinfo=False):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

542

# for a normal job, make sure the uncollected logs file exists

543

# for a crashinfo-only run it should already exist, bail out otherwise

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

544

created_uncollected_logs = False

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

545

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

546

if only_collect_crashinfo:

547

# if this is a crashinfo-only run, and there were no existing

548

# uncollected logs, just bail out early

549

logging.info("No existing uncollected logs, "

550

"skipping crashinfo collection")

551

return

552

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

553

log_file = open(self._uncollected_log_file, "w")

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

554

pickle.dump([], log_file)

555

log_file.close()

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

556

created_uncollected_logs = True

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

557

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

558

# use a copy so changes don't affect the original dictionary

559

namespace = namespace.copy()

560

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

561

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

562

if self.control is None:

563

control = ''

564

else:

565

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

566

if control_file_dir is None:

567

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

568

569

self.aborted = False

Paul Pendlebury

7c1fdcf

2011-05-04 12:39:15 -0700

[diff] [blame]

570

namespace['gtest_runner'] = gtest_runner.gtest_runner()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

571

namespace['machines'] = machines

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

572

namespace['args'] = self.args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

573

namespace['job'] = self

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

574

namespace['ssh_user'] = self._ssh_user

575

namespace['ssh_port'] = self._ssh_port

576

namespace['ssh_pass'] = self._ssh_pass

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

577

test_start_time = int(time.time())

578

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

579

if self.resultdir:

580

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

581

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

582

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

583

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

584

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

585

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

586

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

587

try:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

588

try:

589

if install_before and machines:

590

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

591

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

592

if only_collect_crashinfo:

593

return

594

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

595

# determine the dir to write the control files to

596

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

597

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

598

if cfd_specified:

599

temp_control_file_dir = None

600

else:

601

temp_control_file_dir = tempfile.mkdtemp(

602

suffix='temp_control_file_dir')

603

control_file_dir = temp_control_file_dir

604

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

605

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

606

client_control_file = os.path.join(control_file_dir,

607

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

608

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

609

namespace['control'] = control

610

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

611

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

612

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

613

else:

614

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

615

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

616

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

617

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

618

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

619

# no error occured, so we don't need to collect crashinfo

620

collect_crashinfo = False

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

621

except Exception, e:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

622

try:

623

logging.exception(

624

'Exception escaped control file, job aborting:')

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

625

self.record('INFO', None, None, str(e),

626

{'job_abort_reason': str(e)})

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

627

except:

628

pass # don't let logging exceptions here interfere

629

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

630

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

631

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

632

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

633

try:

634

shutil.rmtree(temp_control_file_dir)

635

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

636

logging.warn('Could not remove temp directory %s: %s',

637

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

638

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

639

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

640

namespace['test_start_time'] = test_start_time

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

641

if collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

642

# includes crashdumps

643

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

644

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

645

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

646

if self._uncollected_log_file and created_uncollected_logs:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

647

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

648

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

649

if cleanup and machines:

650

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

651

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

652

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

653

654

655

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

656

"""

657

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

658

659

tag

660

tag to add to testname

661

url

662

url of the test to run

663

"""

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

664

group, testname = self.pkgmgr.get_package_name(url, 'test')

665

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

666

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

671

except error.TestBaseException, e:

672

self.record(e.exit_status, subdir, testname, str(e))

673

raise

674

except Exception, e:

675

info = str(e) + "\n" + traceback.format_exc()

676

self.record('FAIL', subdir, testname, info)

677

raise

678

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

679

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

680

681

result, exc_info = self._run_group(testname, subdir, group_func)

682

if exc_info and isinstance(exc_info[1], error.TestBaseException):

683

return False

684

elif exc_info:

685

raise exc_info[0], exc_info[1], exc_info[2]

686

else:

687

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

688

689

690

def _run_group(self, name, subdir, function, *args, **dargs):

691

"""\

692

Underlying method for running something inside of a group.

693

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

694

result, exc_info = None, None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

695

try:

696

self.record('START', subdir, name)

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

697

result = function(*args, **dargs)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

698

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

699

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

700

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

701

except Exception, e:

702

err_msg = str(e) + '\n'

703

err_msg += traceback.format_exc()

704

self.record('END ABORT', subdir, name, err_msg)

705

raise error.JobError(name + ' failed\n' + traceback.format_exc())

706

else:

707

self.record('END GOOD', subdir, name)

708

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

709

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

710

711

712

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

718

"""

719

720

name = function.__name__

721

722

# Allow the tag for the group to be specified.

723

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

727

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

728

729

730

def run_reboot(self, reboot_func, get_kernel_func):

731

"""\

732

A specialization of run_group meant specifically for handling

733

a reboot. Includes support for capturing the kernel version

734

after the reboot.

735

736

reboot_func: a function that carries out the reboot

737

738

get_kernel_func: a function that returns a string

739

representing the kernel version.

740

"""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

741

try:

742

self.record('START', None, 'reboot')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

743

reboot_func()

744

except Exception, e:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

745

err_msg = str(e) + '\n' + traceback.format_exc()

746

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

747

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

748

else:

749

kernel = get_kernel_func()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

750

self.record('END GOOD', None, 'reboot',

Paul Pendlebury

92a9074

2011-05-25 11:54:34 -0700

[diff] [blame]

751

optional_fields={"kernel": kernel},

752

status='completed successfully')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

753

754

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

755

def run_control(self, path):

756

"""Execute a control file found at path (relative to the autotest

757

path). Intended for executing a control file within a control file,

758

not for running the top-level job control file."""

759

path = os.path.join(self.autodir, path)

760

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

761

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

762

763

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

764

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

765

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

770

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

771

772

773

def _add_sysinfo_loggable(self, loggable, on_every_test):

774

if on_every_test:

775

self.sysinfo.test_loggables.add(loggable)

776

else:

777

self.sysinfo.boot_loggables.add(loggable)

778

779

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

780

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

781

"""Poll all the warning loggers and extract any new warnings that have

782

been logged. If the warnings belong to a category that is currently

783

disabled, this method will discard them and they will no longer be

784

retrievable.

785

786

Returns a list of (timestamp, message) tuples, where timestamp is an

787

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

788

warnings = []

789

while True:

790

# pull in a line of output from every logger that has

791

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

792

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

793

closed_loggers = set()

794

for logger in loggers:

795

line = logger.readline()

796

# record any broken pipes (aka line == empty)

797

if len(line) == 0:

798

closed_loggers.add(logger)

799

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

800

# parse out the warning

801

timestamp, msgtype, msg = line.split('\t', 2)

802

timestamp = int(timestamp)

803

# if the warning is valid, add it to the results

804

if self.warning_manager.is_valid(timestamp, msgtype):

805

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

806

807

# stop listening to loggers that are closed

808

self.warning_loggers -= closed_loggers

809

810

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

819

def _unique_subdirectory(self, base_subdirectory_name):

820

"""Compute a unique results subdirectory based on the given name.

821

822

Appends base_subdirectory_name with a number as necessary to find a

823

directory name that doesn't already exist.

824

"""

825

subdirectory = base_subdirectory_name

826

counter = 1

827

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

828

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

833

def get_record_context(self):

834

"""Returns an object representing the current job.record context.

835

836

The object returned is an opaque object with a 0-arg restore method

837

which can be called to restore the job.record context (i.e. indentation)

838

to the current level. The intention is that it should be used when

839

something external which generate job.record calls (e.g. an autotest

840

client) can fail catastrophically and the server job record state

841

needs to be reset to its original "known good" state.

842

843

@return: A context object with a 0-arg restore() method."""

844

return self._indenter.get_context()

845

846

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

847

def record_summary(self, status_code, test_name, reason='', attributes=None,

848

distinguishing_attributes=(), child_test_ids=None):

849

"""Record a summary test result.

850

851

@param status_code: status code string, see

852

common_lib.log.is_valid_status()

853

@param test_name: name of the test

854

@param reason: (optional) string providing detailed reason for test

855

outcome

856

@param attributes: (optional) dict of string keyvals to associate with

857

this result

858

@param distinguishing_attributes: (optional) list of attribute names

859

that should be used to distinguish identically-named test

860

results. These attributes should be present in the attributes

861

parameter. This is used to generate user-friendly subdirectory

862

names.

863

@param child_test_ids: (optional) list of test indices for test results

864

used in generating this result.

865

"""

866

subdirectory_name_parts = [test_name]

867

for attribute in distinguishing_attributes:

868

assert attributes

869

assert attribute in attributes, '%s not in %s' % (attribute,

870

attributes)

871

subdirectory_name_parts.append(attributes[attribute])

872

base_subdirectory_name = '.'.join(subdirectory_name_parts)

873

874

subdirectory = self._unique_subdirectory(base_subdirectory_name)

875

subdirectory_path = os.path.join(self.resultdir, subdirectory)

876

os.mkdir(subdirectory_path)

877

878

self.record(status_code, subdirectory, test_name,

879

status=reason, optional_fields={'is_summary': True})

880

881

if attributes:

882

utils.write_keyval(subdirectory_path, attributes)

883

884

if child_test_ids:

885

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

886

summary_data = {'child_test_ids': ids_string}

887

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

891

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

892

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

893

self.record("INFO", None, None,

894

"disabling %s warnings" % warning_type,

895

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

896

897

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

898

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

899

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

900

self.record("INFO", None, None,

901

"enabling %s warnings" % warning_type,

902

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

903

904

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

905

def get_status_log_path(self, subdir=None):

906

"""Return the path to the job status log.

907

908

@param subdir - Optional paramter indicating that you want the path

909

to a subdirectory status log.

910

911

@returns The path where the status log should be.

912

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

913

if self.resultdir:

914

if subdir:

915

return os.path.join(self.resultdir, subdir, "status.log")

916

else:

917

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

918

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

919

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

920

921

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

922

def _update_uncollected_logs_list(self, update_func):

923

"""Updates the uncollected logs list in a multi-process safe manner.

924

925

@param update_func - a function that updates the list of uncollected

926

logs. Should take one parameter, the list to be updated.

927

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

928

if self._uncollected_log_file:

929

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

930

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

931

try:

932

uncollected_logs = pickle.load(log_file)

933

update_func(uncollected_logs)

934

log_file.seek(0)

935

log_file.truncate()

936

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

937

log_file.flush()

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

938

finally:

939

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

944

"""Adds a new set of client logs to the list of uncollected logs,

945

to allow for future log recovery.

946

947

@param host - the hostname of the machine holding the logs

948

@param remote_path - the directory on the remote machine holding logs

949

@param local_path - the local directory to copy the logs into

950

"""

951

def update_func(logs_list):

952

logs_list.append((hostname, remote_path, local_path))

953

self._update_uncollected_logs_list(update_func)

954

955

956

def remove_client_log(self, hostname, remote_path, local_path):

957

"""Removes a set of client logs from the list of uncollected logs,

958

to allow for future log recovery.

959

960

@param host - the hostname of the machine holding the logs

961

@param remote_path - the directory on the remote machine holding logs

962

@param local_path - the local directory to copy the logs into

963

"""

964

def update_func(logs_list):

965

logs_list.remove((hostname, remote_path, local_path))

966

self._update_uncollected_logs_list(update_func)

967

968

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

969

def get_client_logs(self):

970

"""Retrieves the list of uncollected logs, if it exists.

971

972

@returns A list of (host, remote_path, local_path) tuples. Returns

973

an empty list if no uncollected logs file exists.

974

"""

975

log_exists = (self._uncollected_log_file and

976

os.path.exists(self._uncollected_log_file))

977

if log_exists:

978

return pickle.load(open(self._uncollected_log_file))

else:

return []

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

983

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

984

"""

985

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

986

987

This sets up the control file API by importing modules and making them

988

available under the appropriate names within namespace.

989

990

For use by _execute_code().

991

992

Args:

993

namespace: The namespace dictionary to fill in.

994

protect: Boolean. If True (the default) any operation that would

995

clobber an existing entry in namespace will cause an error.

996

Raises:

997

error.AutoservError: When a name would be clobbered by import.

998

"""

999

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1000

"""

1001

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1002

1003

Args:

1004

module_name: The string module name.

1005

names: A limiting list of names to import from module_name. If

1006

empty (the default), all names are imported from the module

1007

similar to a "from foo.bar import *" statement.

1008

Raises:

1009

error.AutoservError: When a name being imported would clobber

1010

a name already in namespace.

1011

"""

1012

module = __import__(module_name, {}, {}, names)

1013

1014

# No names supplied? Import * from the lowest level module.

1015

# (Ugh, why do I have to implement this part myself?)

1016

if not names:

1017

for submodule_name in module_name.split('.')[1:]:

1018

module = getattr(module, submodule_name)

1019

if hasattr(module, '__all__'):

1020

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

1025

# doesn't override anything that already exists.

1026

for name in names:

1027

# Check for conflicts to help prevent future problems.

1028

if name in namespace and protect:

1029

if namespace[name] is not getattr(module, name):

1030

raise error.AutoservError('importing name '

1031

'%s from %s %r would override %r' %

1032

(name, module_name, getattr(module, name),

1033

namespace[name]))

1034

else:

1035

# Encourage cleanliness and the use of __all__ for a

1036

# more concrete API with less surprises on '*' imports.

1037

warnings.warn('%s (%r) being imported from %s for use '

1038

'in server control files is not the '

1039

'first occurrance of that import.' %

1040

(name, namespace[name], module_name))

1041

1042

namespace[name] = getattr(module, name)

1043

1044

1045

# This is the equivalent of prepending a bunch of import statements to

1046

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

1047

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1048

_import_names('autotest_lib.server',

1049

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

1050

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

1051

_import_names('autotest_lib.server.subcommand',

1052

('parallel', 'parallel_simple', 'subcommand'))

1053

_import_names('autotest_lib.server.utils',

1054

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

1055

_import_names('autotest_lib.client.common_lib.error')

1056

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

1057

1058

# Inject ourself as the job object into other classes within the API.

1059

# (Yuck, this injection is a gross thing be part of a public API. -gps)

1060

#

1061

# XXX Base & SiteAutotest do not appear to use .job. Who does?

1062

namespace['autotest'].Autotest.job = self

1063

# server.hosts.base_classes.Host uses .job.

1064

namespace['hosts'].Host.job = self

Eric Li

10222b8

2010-11-24 09:33:15 -0800

[diff] [blame]

1065

namespace['hosts'].factory.ssh_user = self._ssh_user

1066

namespace['hosts'].factory.ssh_port = self._ssh_port

1067

namespace['hosts'].factory.ssh_pass = self._ssh_pass

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1068

1069

1070

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1071

"""

1072

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1073

1074

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

1079

namespace: A dict containing names to make available during execution.

1080

protect: Boolean. If True (the default) a copy of the namespace dict

1081

is used during execution to prevent the code from modifying its

1082

contents outside of this function. If False the raw dict is

1083

passed in and modifications will be allowed.

1084

"""

1085

if protect:

1086

namespace = namespace.copy()

1087

self._fill_server_control_namespace(namespace, protect=protect)

1088

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1089

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1090

machines_text = '\n'.join(self.machines) + '\n'

1091

# Only rewrite the file if it does not match our machine list.

1092

try:

1093

machines_f = open(MACHINES_FILENAME, 'r')

1094

existing_machines_text = machines_f.read()

1095

machines_f.close()

1096

except EnvironmentError:

1097

existing_machines_text = None

1098

if machines_text != existing_machines_text:

1099

utils.open_write_close(MACHINES_FILENAME, machines_text)

1100

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1101

1102

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1103

def _parse_status(self, new_line):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1104

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1105

return

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1106

new_tests = self.parser.process_lines([new_line])

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1107

for test in new_tests:

1108

self.__insert_test(test)

1109

1110

1111

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1112

"""

1113

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1114

database. This method will not raise an exception, even if an

1115

error occurs during the insert, to avoid failing a test

1116

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1117

self.num_tests_run += 1

1118

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1119

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1120

try:

1121

self.results_db.insert_test(self.job_model, test)

1122

except Exception:

1123

msg = ("WARNING: An unexpected error occured while "

1124

"inserting test results into the database. "

1125

"Ignoring error.\n" + traceback.format_exc())

1126

print >> sys.stderr, msg

1127

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1128

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1129

def preprocess_client_state(self):

1130

"""

1131

Produce a state file for initializing the state of a client job.

1132

1133

Creates a new client state file with all the current server state, as

1134

well as some pre-set client state.

1135

1136

@returns The path of the file the state was written into.

1137

"""

1138

# initialize the sysinfo state

1139

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1140

1141

# dump the state out to a tempfile

1142

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1143

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1144

1145

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1146

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1151

"""

1152

Update the state of this job with the state from a client job.

1153

1154

Updates the state of the server side of a job with the final state

1155

of a client job that was run. Updates the non-client-specific state,

1156

pulls in some specific bits from the client-specific state, and then

1157

discards the rest. Removes the state file afterwards

1158

1159

@param state_file A path to the state file from the client.

1160

"""

1161

# update the on-disk state

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1162

try:

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1163

self._state.read_from_file(state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1164

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1165

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1166

# ignore file-not-found errors

1167

if e.errno != errno.ENOENT:

1168

raise

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1169

else:

1170

logging.debug('Client state file %s not found', state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1171

1172

# update the sysinfo state

1173

if self._state.has('client', 'sysinfo'):

1174

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1175

1176

# drop all the client-specific state

1177

self._state.discard_namespace('client')

1178

1179

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

1180

def clear_all_known_hosts(self):

1181

"""Clears known hosts files for all AbstractSSHHosts."""

1182

for host in self.hosts:

1183

if isinstance(host, abstract_ssh.AbstractSSHHost):

1184

host.clear_known_hosts()

1185

1186

mbligh

a700772

2009-01-13 00:37:11 +0000

[diff] [blame]

1187

site_server_job = utils.import_site_class(

1188

__file__, "autotest_lib.server.site_server_job", "site_server_job",

1189

base_server_job)

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1190

mbligh

0a8c332

2009-04-28 18:32:19 +0000

[diff] [blame]

1191

class server_job(site_server_job):

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1192

pass

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1193

1194

1195

class warning_manager(object):

1196

"""Class for controlling warning logs. Manages the enabling and disabling

1197

of warnings."""

1198

def __init__(self):

1199

# a map of warning types to a list of disabled time intervals

1200

self.disabled_warnings = {}

1201

1202

1203

def is_valid(self, timestamp, warning_type):

1204

"""Indicates if a warning (based on the time it occured and its type)

1205

is a valid warning. A warning is considered "invalid" if this type of

1206

warning was marked as "disabled" at the time the warning occured."""

1207

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1208

for start, end in disabled_intervals:

1209

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1215

"""As of now, disables all further warnings of this type."""

1216

intervals = self.disabled_warnings.setdefault(warning_type, [])

1217

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1218

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1219

1220

1221

def enable_warnings(self, warning_type, current_time_func=time.time):

1222

"""As of now, enables all further warnings of this type."""

1223

intervals = self.disabled_warnings.get(warning_type, [])

1224

if intervals and intervals[-1][1] is None:

jadmanski