Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2009-03-19 20:25:24 +0000

[diff] [blame]

9

import getpass, os, sys, re, stat, tempfile, time, select, subprocess

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

10

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

11

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

12

from autotest_lib.client.common_lib import base_job

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

13

from autotest_lib.client.common_lib import error, log, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

14

from autotest_lib.client.common_lib import logging_manager

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

15

from autotest_lib.server import test, subcommand, profilers

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

16

from autotest_lib.server.hosts import abstract_ssh

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

17

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

18

19

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

20

def _control_segment_path(name):

21

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

22

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

23

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

24

25

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

26

CLIENT_CONTROL_FILENAME = 'control'

27

SERVER_CONTROL_FILENAME = 'control.srv'

28

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

29

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

30

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

31

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

32

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

33

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

34

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

35

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

36

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

37

REPAIR_CONTROL_FILE = _control_segment_path('repair')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

38

39

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

40

# by default provide a stub that generates no site data

41

def _get_site_job_data_dummy(job):

return {}

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

45

# load up site-specific code for generating site-specific job data

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

46

get_site_job_data = utils.import_site_function(__file__,

jadmanski

c0a623d

2009-03-03 21:11:48 +0000

[diff] [blame]

47

"autotest_lib.server.site_server_job", "get_site_job_data",

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

48

_get_site_job_data_dummy)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

49

50

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

51

class status_indenter(base_job.status_indenter):

52

"""Provide a simple integer-backed status indenter."""

def __init__(self):

self._indent = 0

@property

def indent(self):

return self._indent

def increment(self):

self._indent += 1

def decrement(self):

self._indent -= 1

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame^]

70

def get_context(self):

71

"""Returns a context object for use by job.get_record_context."""

72

class context(object):

73

def __init__(self, indenter, indent):

74

self._indenter = indenter

75

self._indent = indent

76

def restore(self):

77

self._indenter._indent = self._indent

78

return context(self, self._indent)

79

80

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

81

class server_job_record_hook(object):

82

"""The job.record hook for server job. Used to inject WARN messages from

83

the console or vlm whenever new logs are written, and to echo any logs

84

to INFO level logging. Implemented as a class so that it can use state to

85

block recursive calls, so that the hook can call job.record itself to

86

log WARN messages.

87

88

Depends on job._read_warnings and job._logger.

89

"""

90

def __init__(self, job):

91

self._job = job

92

self._being_called = False

93

94

95

def __call__(self, entry):

96

"""A wrapper around the 'real' record hook, the _hook method, which

97

prevents recursion. This isn't making any effort to be threadsafe,

98

the intent is to outright block infinite recursion via a

99

job.record->_hook->job.record->_hook->job.record... chain."""

100

if self._being_called:

101

return

102

self._being_called = True

103

try:

104

self._hook(self._job, entry)

105

finally:

106

self._being_called = False

@staticmethod

def _hook(job, entry):

111

"""The core hook, which can safely call job.record."""

112

entries = []

113

# poll all our warning loggers for new warnings

114

for timestamp, msg in job._read_warnings():

115

warning_entry = base_job.status_log_entry(

116

'WARN', None, None, msg, {}, timestamp=timestamp)

117

entries.append(warning_entry)

118

job.record_entry(warning_entry)

119

# echo rendered versions of all the status logs to info

120

entries.append(entry)

121

for entry in entries:

122

rendered_entry = job._logger.render_entry(entry)

123

logging.info(rendered_entry)

124

125

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

126

class base_server_job(base_job.base_job):

127

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

128

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

129

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

138

"""

139

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

140

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

141

142

def __init__(self, control, args, resultdir, label, user, machines,

143

client=False, parse_job='',

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

144

ssh_user='root', ssh_port=22, ssh_pass='',

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

145

group_name='', tag='',

146

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

147

"""

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

148

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

149

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

150

@param control: The pathname of the control file.

151

@param args: Passed to the control file.

152

@param resultdir: Where to throw the results.

153

@param label: Description of the job.

154

@param user: Username for the job (email address).

155

@param client: True if this is a client-side control file.

156

@param parse_job: string, if supplied it is the job execution tag that

157

the results will be passed through to the TKO parser with.

158

@param ssh_user: The SSH username. [root]

159

@param ssh_port: The SSH port number. [22]

160

@param ssh_pass: The SSH passphrase, if needed.

161

@param group_name: If supplied, this will be written out as

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

162

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

163

@param tag: The job execution tag from the scheduler. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

164

@param control_filename: The filename where the server control file

165

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

166

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

167

super(base_server_job, self).__init__(resultdir=resultdir)

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

168

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

169

path = os.path.dirname(__file__)

170

self.control = control

171

self._uncollected_log_file = os.path.join(self.resultdir,

172

'uncollected_logs')

173

debugdir = os.path.join(self.resultdir, 'debug')

174

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

181

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

182

self.args = args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

183

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

184

self._client = client

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

185

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

186

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

187

self._ssh_user = ssh_user

188

self._ssh_port = ssh_port

189

self._ssh_pass = ssh_pass

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

190

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

191

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

192

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

193

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

194

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

195

self._control_filename = control_filename

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

196

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

197

self.logging = logging_manager.get_logging_manager(

198

manage_stdout_and_stderr=True, redirect_fds=True)

199

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

200

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

201

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

202

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

203

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

204

job_data = {'label' : label, 'user' : user,

205

'hostname' : ','.join(machines),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

206

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

207

'job_started' : str(int(time.time()))}

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

208

if group_name:

209

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

210

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

211

# only write these keyvals out on the first job in a resultdir

212

if 'job_started' not in utils.read_keyval(self.resultdir):

213

job_data.update(get_site_job_data(self))

214

utils.write_keyval(self.resultdir, job_data)

215

216

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

217

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

218

self.pkgmgr = packages.PackageManager(

219

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

220

self.num_tests_run = 0

221

self.num_tests_failed = 0

222

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

223

self._register_subcommand_hooks()

224

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

225

# these components aren't usable on the server

226

self.bootloader = None

227

self.harness = None

228

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

229

# set up the status logger

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame^]

230

self._indenter = status_indenter()

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

231

self._logger = base_job.status_logger(

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame^]

232

self, self._indenter, 'status.log', 'status.log',

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

233

record_hook=server_job_record_hook(self))

234

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

235

236

@classmethod

237

def _find_base_directories(cls):

238

"""

239

Determine locations of autodir, clientdir and serverdir. Assumes

240

that this file is located within serverdir and uses __file__ along

241

with relative paths to resolve the location.

242

"""

243

serverdir = os.path.abspath(os.path.dirname(__file__))

244

autodir = os.path.normpath(os.path.join(serverdir, '..'))

245

clientdir = os.path.join(autodir, 'client')

246

return autodir, clientdir, serverdir

247

248

249

def _find_resultdir(self, resultdir):

250

"""

251

Determine the location of resultdir. For server jobs we expect one to

252

always be explicitly passed in to __init__, so just return that.

253

"""

254

if resultdir:

255

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

259

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

260

def _get_status_logger(self):

261

"""Return a reference to the status logger."""

return self._logger

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

265

@staticmethod

266

def _load_control_file(path):

267

f = open(path)

268

try:

269

control_file = f.read()

270

finally:

271

f.close()

272

return re.sub('\r', '', control_file)

273

274

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

275

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

276

"""

277

Register some hooks into the subcommand modules that allow us

278

to properly clean up self.hosts created in forked subprocesses.

279

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

280

def on_fork(cmd):

281

self._existing_hosts_on_fork = set(self.hosts)

282

def on_join(cmd):

283

new_hosts = self.hosts - self._existing_hosts_on_fork

284

for host in new_hosts:

285

host.close()

286

subcommand.subcommand.register_fork_hook(on_fork)

287

subcommand.subcommand.register_join_hook(on_join)

288

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

289

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

290

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

291

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

292

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

293

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

294

the database if necessary.

295

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

296

if not self._using_parser:

297

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

298

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

299

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

300

parse_log = open(parse_log, 'w', 0)

301

tko_utils.redirect_parser_debugging(parse_log)

302

# create a job model object and set up the db

303

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

304

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

305

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

306

self.parser.start(self.job_model)

307

# check if a job already exists in the db and insert it if

308

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

309

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

310

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

311

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

312

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

313

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

314

self.job_model.index = job_idx

315

self.job_model.machine_idx = machine_idx

316

317

318

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

319

"""

320

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

321

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

322

remaining test results to the results db)

323

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

324

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

325

return

326

final_tests = self.parser.end()

327

for test in final_tests:

328

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

329

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def verify(self):

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

334

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

335

if self.resultdir:

336

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

337

try:

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

338

namespace = {'machines' : self.machines, 'job' : self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

339

'ssh_user' : self._ssh_user,

340

'ssh_port' : self._ssh_port,

341

'ssh_pass' : self._ssh_pass}

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

342

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

343

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

344

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

345

self.record('ABORT', None, None, msg)

raise

def repair(self, host_protection):

350

if not self.machines:

351

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

352

if self.resultdir:

353

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

354

namespace = {'machines': self.machines, 'job': self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

355

'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,

356

'ssh_pass': self._ssh_pass,

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

357

'protection_level': host_protection}

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

358

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

359

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def precheck(self):

"""

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

370

"""

371

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

377

"""

378

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

384

"""

385

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

390

def _make_parallel_wrapper(self, function, machines, log):

391

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

392

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

393

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

394

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

395

self._parse_job += "/" + machine

396

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

397

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

398

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

399

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

400

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

401

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

402

result = function(machine)

403

self.cleanup_parser()

404

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

405

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

406

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

407

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

408

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

409

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

410

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

411

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

412

result = function(machine)

413

return result

414

else:

415

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

420

return_results=False):

421

"""

422

Run 'function' using parallel_simple, with an extra wrapper to handle

423

the necessary setup for continuous parsing, if possible. If continuous

424

parsing is already properly initialized then this should just work.

425

426

@param function: A callable to run in parallel given each machine.

427

@param machines: A list of machine names to be passed one per subcommand

428

invocation of function.

429

@param log: If True, output will be written to output in a subdirectory

430

named after each machine.

431

@param timeout: Seconds after which the function call should timeout.

432

@param return_results: If True instead of an AutoServError being raised

433

on any error a list of the results|exceptions from the function

434

called on each arg is returned. [default: False]

435

436

@raises error.AutotestError: If any of the functions failed.

437

"""

438

wrapper = self._make_parallel_wrapper(function, machines, log)

439

return subcommand.parallel_simple(wrapper, machines,

440

log=log, timeout=timeout,

441

return_results=return_results)

442

443

444

def parallel_on_machines(self, function, machines, timeout=None):

445

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

446

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

447

@param machines: A list of machines to call function(machine) on.

448

@param timeout: Seconds after which the function call should timeout.

449

450

@returns A list of machines on which function(machine) returned

451

without raising an exception.

452

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

453

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

454

return_results=True)

455

success_machines = []

456

for result, machine in itertools.izip(results, machines):

457

if not isinstance(result, Exception):

458

success_machines.append(machine)

459

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

460

461

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

462

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

463

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

464

collect_crashdumps=True, namespace={}, control=None,

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

465

control_file_dir=None, only_collect_crashinfo=False):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

466

# for a normal job, make sure the uncollected logs file exists

467

# for a crashinfo-only run it should already exist, bail out otherwise

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

468

created_uncollected_logs = False

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

469

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

470

if only_collect_crashinfo:

471

# if this is a crashinfo-only run, and there were no existing

472

# uncollected logs, just bail out early

473

logging.info("No existing uncollected logs, "

474

"skipping crashinfo collection")

475

return

476

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

477

log_file = open(self._uncollected_log_file, "w")

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

478

pickle.dump([], log_file)

479

log_file.close()

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

480

created_uncollected_logs = True

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

481

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

482

# use a copy so changes don't affect the original dictionary

483

namespace = namespace.copy()

484

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

485

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

486

if self.control is None:

487

control = ''

488

else:

489

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

490

if control_file_dir is None:

491

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

492

493

self.aborted = False

494

namespace['machines'] = machines

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

495

namespace['args'] = self.args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

496

namespace['job'] = self

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

497

namespace['ssh_user'] = self._ssh_user

498

namespace['ssh_port'] = self._ssh_port

499

namespace['ssh_pass'] = self._ssh_pass

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

500

test_start_time = int(time.time())

501

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

502

if self.resultdir:

503

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

504

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

505

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

506

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

507

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

508

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

509

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

510

try:

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

511

try:

512

if install_before and machines:

513

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

514

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

515

if only_collect_crashinfo:

516

return

517

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

518

# determine the dir to write the control files to

519

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

520

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

521

if cfd_specified:

522

temp_control_file_dir = None

523

else:

524

temp_control_file_dir = tempfile.mkdtemp(

525

suffix='temp_control_file_dir')

526

control_file_dir = temp_control_file_dir

527

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

528

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

529

client_control_file = os.path.join(control_file_dir,

530

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

531

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

532

namespace['control'] = control

533

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

534

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

535

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

536

else:

537

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

538

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

539

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

540

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

541

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

542

# no error occured, so we don't need to collect crashinfo

543

collect_crashinfo = False

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

except:

try:

logging.exception(

'Exception escaped control file, job aborting:')

548

except:

549

pass # don't let logging exceptions here interfere

550

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

551

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

552

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

553

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

554

try:

555

shutil.rmtree(temp_control_file_dir)

556

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

557

logging.warn('Could not remove temp directory %s: %s',

558

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

559

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

560

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

561

namespace['test_start_time'] = test_start_time

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

562

if collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

563

# includes crashdumps

564

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

565

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

566

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

567

if self._uncollected_log_file and created_uncollected_logs:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

568

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

569

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

570

if cleanup and machines:

571

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

572

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

573

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

574

575

576

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

577

"""

578

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

579

580

tag

581

tag to add to testname

582

url

583

url of the test to run

584

"""

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

585

group, testname = self.pkgmgr.get_package_name(url, 'test')

586

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

587

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

592

except error.TestBaseException, e:

593

self.record(e.exit_status, subdir, testname, str(e))

594

raise

595

except Exception, e:

596

info = str(e) + "\n" + traceback.format_exc()

597

self.record('FAIL', subdir, testname, info)

598

raise

599

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

600

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

601

602

result, exc_info = self._run_group(testname, subdir, group_func)

603

if exc_info and isinstance(exc_info[1], error.TestBaseException):

604

return False

605

elif exc_info:

606

raise exc_info[0], exc_info[1], exc_info[2]

607

else:

608

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

609

610

611

def _run_group(self, name, subdir, function, *args, **dargs):

612

"""\

613

Underlying method for running something inside of a group.

614

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

615

result, exc_info = None, None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

616

try:

617

self.record('START', subdir, name)

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame^]

618

result = function(*args, **dargs)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

619

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

620

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

621

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

622

except Exception, e:

623

err_msg = str(e) + '\n'

624

err_msg += traceback.format_exc()

625

self.record('END ABORT', subdir, name, err_msg)

626

raise error.JobError(name + ' failed\n' + traceback.format_exc())

627

else:

628

self.record('END GOOD', subdir, name)

629

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

630

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

631

632

633

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

639

"""

640

641

name = function.__name__

642

643

# Allow the tag for the group to be specified.

644

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

648

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

649

650

651

def run_reboot(self, reboot_func, get_kernel_func):

652

"""\

653

A specialization of run_group meant specifically for handling

654

a reboot. Includes support for capturing the kernel version

655

after the reboot.

656

657

reboot_func: a function that carries out the reboot

658

659

get_kernel_func: a function that returns a string

660

representing the kernel version.

661

"""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

662

try:

663

self.record('START', None, 'reboot')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

664

reboot_func()

665

except Exception, e:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

666

err_msg = str(e) + '\n' + traceback.format_exc()

667

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

668

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

669

else:

670

kernel = get_kernel_func()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

671

self.record('END GOOD', None, 'reboot',

672

optional_fields={"kernel": kernel})

673

674

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

675

def run_control(self, path):

676

"""Execute a control file found at path (relative to the autotest

677

path). Intended for executing a control file within a control file,

678

not for running the top-level job control file."""

679

path = os.path.join(self.autodir, path)

680

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

681

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

682

683

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

684

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

685

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

690

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

691

692

693

def _add_sysinfo_loggable(self, loggable, on_every_test):

694

if on_every_test:

695

self.sysinfo.test_loggables.add(loggable)

696

else:

697

self.sysinfo.boot_loggables.add(loggable)

698

699

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

700

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

701

"""Poll all the warning loggers and extract any new warnings that have

702

been logged. If the warnings belong to a category that is currently

703

disabled, this method will discard them and they will no longer be

704

retrievable.

705

706

Returns a list of (timestamp, message) tuples, where timestamp is an

707

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

708

warnings = []

709

while True:

710

# pull in a line of output from every logger that has

711

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

712

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

713

closed_loggers = set()

714

for logger in loggers:

715

line = logger.readline()

716

# record any broken pipes (aka line == empty)

717

if len(line) == 0:

718

closed_loggers.add(logger)

719

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

720

# parse out the warning

721

timestamp, msgtype, msg = line.split('\t', 2)

722

timestamp = int(timestamp)

723

# if the warning is valid, add it to the results

724

if self.warning_manager.is_valid(timestamp, msgtype):

725

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

726

727

# stop listening to loggers that are closed

728

self.warning_loggers -= closed_loggers

729

730

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

739

def _unique_subdirectory(self, base_subdirectory_name):

740

"""Compute a unique results subdirectory based on the given name.

741

742

Appends base_subdirectory_name with a number as necessary to find a

743

directory name that doesn't already exist.

744

"""

745

subdirectory = base_subdirectory_name

746

counter = 1

747

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

748

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame^]

753

def get_record_context(self):

754

"""Returns an object representing the current job.record context.

755

756

The object returned is an opaque object with a 0-arg restore method

757

which can be called to restore the job.record context (i.e. indentation)

758

to the current level. The intention is that it should be used when

759

something external which generate job.record calls (e.g. an autotest

760

client) can fail catastrophically and the server job record state

761

needs to be reset to its original "known good" state.

762

763

@return: A context object with a 0-arg restore() method."""

764

return self._indenter.get_context()

765

766

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

767

def record_summary(self, status_code, test_name, reason='', attributes=None,

768

distinguishing_attributes=(), child_test_ids=None):

769

"""Record a summary test result.

770

771

@param status_code: status code string, see

772

common_lib.log.is_valid_status()

773

@param test_name: name of the test

774

@param reason: (optional) string providing detailed reason for test

775

outcome

776

@param attributes: (optional) dict of string keyvals to associate with

777

this result

778

@param distinguishing_attributes: (optional) list of attribute names

779

that should be used to distinguish identically-named test

780

results. These attributes should be present in the attributes

781

parameter. This is used to generate user-friendly subdirectory

782

names.

783

@param child_test_ids: (optional) list of test indices for test results

784

used in generating this result.

785

"""

786

subdirectory_name_parts = [test_name]

787

for attribute in distinguishing_attributes:

788

assert attributes

789

assert attribute in attributes, '%s not in %s' % (attribute,

790

attributes)

791

subdirectory_name_parts.append(attributes[attribute])

792

base_subdirectory_name = '.'.join(subdirectory_name_parts)

793

794

subdirectory = self._unique_subdirectory(base_subdirectory_name)

795

subdirectory_path = os.path.join(self.resultdir, subdirectory)

796

os.mkdir(subdirectory_path)

797

798

self.record(status_code, subdirectory, test_name,

799

status=reason, optional_fields={'is_summary': True})

800

801

if attributes:

802

utils.write_keyval(subdirectory_path, attributes)

803

804

if child_test_ids:

805

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

806

summary_data = {'child_test_ids': ids_string}

807

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

811

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

812

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

813

self.record("INFO", None, None,

814

"disabling %s warnings" % warning_type,

815

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

816

817

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

818

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

819

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

820

self.record("INFO", None, None,

821

"enabling %s warnings" % warning_type,

822

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

823

824

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

825

def get_status_log_path(self, subdir=None):

826

"""Return the path to the job status log.

827

828

@param subdir - Optional paramter indicating that you want the path

829

to a subdirectory status log.

830

831

@returns The path where the status log should be.

832

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

833

if self.resultdir:

834

if subdir:

835

return os.path.join(self.resultdir, subdir, "status.log")

836

else:

837

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

838

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

839

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

840

841

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

842

def _update_uncollected_logs_list(self, update_func):

843

"""Updates the uncollected logs list in a multi-process safe manner.

844

845

@param update_func - a function that updates the list of uncollected

846

logs. Should take one parameter, the list to be updated.

847

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

848

if self._uncollected_log_file:

849

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

850

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

851

try:

852

uncollected_logs = pickle.load(log_file)

853

update_func(uncollected_logs)

854

log_file.seek(0)

855

log_file.truncate()

856

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

857

log_file.flush()

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

858

finally:

859

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

864

"""Adds a new set of client logs to the list of uncollected logs,

865

to allow for future log recovery.

866

867

@param host - the hostname of the machine holding the logs

868

@param remote_path - the directory on the remote machine holding logs

869

@param local_path - the local directory to copy the logs into

870

"""

871

def update_func(logs_list):

872

logs_list.append((hostname, remote_path, local_path))

873

self._update_uncollected_logs_list(update_func)

874

875

876

def remove_client_log(self, hostname, remote_path, local_path):

877

"""Removes a set of client logs from the list of uncollected logs,

878

to allow for future log recovery.

879

880

@param host - the hostname of the machine holding the logs

881

@param remote_path - the directory on the remote machine holding logs

882

@param local_path - the local directory to copy the logs into

883

"""

884

def update_func(logs_list):

885

logs_list.remove((hostname, remote_path, local_path))

886

self._update_uncollected_logs_list(update_func)

887

888

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

889

def get_client_logs(self):

890

"""Retrieves the list of uncollected logs, if it exists.

891

892

@returns A list of (host, remote_path, local_path) tuples. Returns

893

an empty list if no uncollected logs file exists.

894

"""

895

log_exists = (self._uncollected_log_file and

896

os.path.exists(self._uncollected_log_file))

897

if log_exists:

898

return pickle.load(open(self._uncollected_log_file))

else:

return []

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

903

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

904

"""

905

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

906

907

This sets up the control file API by importing modules and making them

908

available under the appropriate names within namespace.

909

910

For use by _execute_code().

911

912

Args:

913

namespace: The namespace dictionary to fill in.

914

protect: Boolean. If True (the default) any operation that would

915

clobber an existing entry in namespace will cause an error.

916

Raises:

917

error.AutoservError: When a name would be clobbered by import.

918

"""

919

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

920

"""

921

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

922

923

Args:

924

module_name: The string module name.

925

names: A limiting list of names to import from module_name. If

926

empty (the default), all names are imported from the module

927

similar to a "from foo.bar import *" statement.

928

Raises:

929

error.AutoservError: When a name being imported would clobber

930

a name already in namespace.

931

"""

932

module = __import__(module_name, {}, {}, names)

933

934

# No names supplied? Import * from the lowest level module.

935

# (Ugh, why do I have to implement this part myself?)

936

if not names:

937

for submodule_name in module_name.split('.')[1:]:

938

module = getattr(module, submodule_name)

939

if hasattr(module, '__all__'):

940

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

945

# doesn't override anything that already exists.

946

for name in names:

947

# Check for conflicts to help prevent future problems.

948

if name in namespace and protect:

949

if namespace[name] is not getattr(module, name):

950

raise error.AutoservError('importing name '

951

'%s from %s %r would override %r' %

952

(name, module_name, getattr(module, name),

953

namespace[name]))

954

else:

955

# Encourage cleanliness and the use of __all__ for a

956

# more concrete API with less surprises on '*' imports.

957

warnings.warn('%s (%r) being imported from %s for use '

958

'in server control files is not the '

959

'first occurrance of that import.' %

960

(name, namespace[name], module_name))

961

962

namespace[name] = getattr(module, name)

963

964

965

# This is the equivalent of prepending a bunch of import statements to

966

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

967

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

968

_import_names('autotest_lib.server',

969

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

970

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

971

_import_names('autotest_lib.server.subcommand',

972

('parallel', 'parallel_simple', 'subcommand'))

973

_import_names('autotest_lib.server.utils',

974

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

975

_import_names('autotest_lib.client.common_lib.error')

976

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

977

978

# Inject ourself as the job object into other classes within the API.

979

# (Yuck, this injection is a gross thing be part of a public API. -gps)

980

#

981

# XXX Base & SiteAutotest do not appear to use .job. Who does?

982

namespace['autotest'].Autotest.job = self

983

# server.hosts.base_classes.Host uses .job.

984

namespace['hosts'].Host.job = self

985

986

987

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

988

"""

989

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

990

991

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

996

namespace: A dict containing names to make available during execution.

997

protect: Boolean. If True (the default) a copy of the namespace dict

998

is used during execution to prevent the code from modifying its

999

contents outside of this function. If False the raw dict is

1000

passed in and modifications will be allowed.

1001

"""

1002

if protect:

1003

namespace = namespace.copy()

1004

self._fill_server_control_namespace(namespace, protect=protect)

1005

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1006

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1007

machines_text = '\n'.join(self.machines) + '\n'

1008

# Only rewrite the file if it does not match our machine list.

1009

try:

1010

machines_f = open(MACHINES_FILENAME, 'r')

1011

existing_machines_text = machines_f.read()

1012

machines_f.close()

1013

except EnvironmentError:

1014

existing_machines_text = None

1015

if machines_text != existing_machines_text:

1016

utils.open_write_close(MACHINES_FILENAME, machines_text)

1017

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1018

1019

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1020

def __parse_status(self, new_lines):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1021

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1022

return

1023

new_tests = self.parser.process_lines(new_lines)

1024

for test in new_tests:

1025

self.__insert_test(test)

1026

1027

1028

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1029

"""

1030

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1031

database. This method will not raise an exception, even if an

1032

error occurs during the insert, to avoid failing a test

1033

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1034

self.num_tests_run += 1

1035

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1036

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1037

try:

1038

self.results_db.insert_test(self.job_model, test)

1039

except Exception:

1040

msg = ("WARNING: An unexpected error occured while "

1041

"inserting test results into the database. "

1042

"Ignoring error.\n" + traceback.format_exc())

1043

print >> sys.stderr, msg

1044

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1045

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1046

def preprocess_client_state(self):

1047

"""

1048

Produce a state file for initializing the state of a client job.

1049

1050

Creates a new client state file with all the current server state, as

1051

well as some pre-set client state.

1052

1053

@returns The path of the file the state was written into.

1054

"""

1055

# initialize the sysinfo state

1056

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1057

1058

# dump the state out to a tempfile

1059

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1060

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1061

1062

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1063

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1068

"""

1069

Update the state of this job with the state from a client job.

1070

1071

Updates the state of the server side of a job with the final state

1072

of a client job that was run. Updates the non-client-specific state,

1073

pulls in some specific bits from the client-specific state, and then

1074

discards the rest. Removes the state file afterwards

1075

1076

@param state_file A path to the state file from the client.

1077

"""

1078

# update the on-disk state

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1079

try:

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1080

self._state.read_from_file(state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1081

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1082

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1083

# ignore file-not-found errors

1084

if e.errno != errno.ENOENT:

1085

raise

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1086

else:

1087

logging.debug('Client state file %s not found', state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1088

1089

# update the sysinfo state

1090

if self._state.has('client', 'sysinfo'):

1091

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1092

1093

# drop all the client-specific state

1094

self._state.discard_namespace('client')

1095

1096

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

1097

def clear_all_known_hosts(self):

1098

"""Clears known hosts files for all AbstractSSHHosts."""

1099

for host in self.hosts:

1100

if isinstance(host, abstract_ssh.AbstractSSHHost):

1101

host.clear_known_hosts()

1102

1103

mbligh

a700772

2009-01-13 00:37:11 +0000

[diff] [blame]

1104

site_server_job = utils.import_site_class(

1105

__file__, "autotest_lib.server.site_server_job", "site_server_job",

1106

base_server_job)

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1107

mbligh

0a8c332

2009-04-28 18:32:19 +0000

[diff] [blame]

1108

class server_job(site_server_job):

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1109

pass

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1110

1111

1112

class warning_manager(object):

1113

"""Class for controlling warning logs. Manages the enabling and disabling

1114

of warnings."""

1115

def __init__(self):

1116

# a map of warning types to a list of disabled time intervals

1117

self.disabled_warnings = {}

1118

1119

1120

def is_valid(self, timestamp, warning_type):

1121

"""Indicates if a warning (based on the time it occured and its type)

1122

is a valid warning. A warning is considered "invalid" if this type of

1123

warning was marked as "disabled" at the time the warning occured."""

1124

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1125

for start, end in disabled_intervals:

1126

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1132

"""As of now, disables all further warnings of this type."""

1133

intervals = self.disabled_warnings.setdefault(warning_type, [])

1134

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1135

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1136

1137

1138

def enable_warnings(self, warning_type, current_time_func=time.time):

1139

"""As of now, enables all further warnings of this type."""

1140

intervals = self.disabled_warnings.get(warning_type, [])

1141

if intervals and intervals[-1][1] is None:

jadmanski