Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2009-03-19 20:25:24 +0000

[diff] [blame]

9

import getpass, os, sys, re, stat, tempfile, time, select, subprocess

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

10

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

11

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

12

from autotest_lib.client.common_lib import base_job

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

13

from autotest_lib.client.common_lib import error, log, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

14

from autotest_lib.client.common_lib import logging_manager

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

15

from autotest_lib.server import test, subcommand, profilers

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

16

from autotest_lib.server.hosts import abstract_ssh

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

17

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

18

19

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

20

def _control_segment_path(name):

21

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

22

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

23

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

24

25

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

26

CLIENT_CONTROL_FILENAME = 'control'

27

SERVER_CONTROL_FILENAME = 'control.srv'

28

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

29

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

30

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

31

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

32

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

33

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

34

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

35

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

36

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

37

REPAIR_CONTROL_FILE = _control_segment_path('repair')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

38

39

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

40

# by default provide a stub that generates no site data

41

def _get_site_job_data_dummy(job):

return {}

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

45

# load up site-specific code for generating site-specific job data

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

46

get_site_job_data = utils.import_site_function(__file__,

jadmanski

c0a623d

2009-03-03 21:11:48 +0000

[diff] [blame]

47

"autotest_lib.server.site_server_job", "get_site_job_data",

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

48

_get_site_job_data_dummy)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

49

50

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

51

class status_indenter(base_job.status_indenter):

52

"""Provide a simple integer-backed status indenter."""

def __init__(self):

self._indent = 0

@property

def indent(self):

return self._indent

def increment(self):

self._indent += 1

def decrement(self):

self._indent -= 1

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

70

def get_context(self):

71

"""Returns a context object for use by job.get_record_context."""

72

class context(object):

73

def __init__(self, indenter, indent):

74

self._indenter = indenter

75

self._indent = indent

76

def restore(self):

77

self._indenter._indent = self._indent

78

return context(self, self._indent)

79

80

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

81

class server_job_record_hook(object):

82

"""The job.record hook for server job. Used to inject WARN messages from

83

the console or vlm whenever new logs are written, and to echo any logs

84

to INFO level logging. Implemented as a class so that it can use state to

85

block recursive calls, so that the hook can call job.record itself to

86

log WARN messages.

87

88

Depends on job._read_warnings and job._logger.

89

"""

90

def __init__(self, job):

91

self._job = job

92

self._being_called = False

93

94

95

def __call__(self, entry):

96

"""A wrapper around the 'real' record hook, the _hook method, which

97

prevents recursion. This isn't making any effort to be threadsafe,

98

the intent is to outright block infinite recursion via a

99

job.record->_hook->job.record->_hook->job.record... chain."""

100

if self._being_called:

101

return

102

self._being_called = True

103

try:

104

self._hook(self._job, entry)

105

finally:

106

self._being_called = False

@staticmethod

def _hook(job, entry):

111

"""The core hook, which can safely call job.record."""

112

entries = []

113

# poll all our warning loggers for new warnings

114

for timestamp, msg in job._read_warnings():

115

warning_entry = base_job.status_log_entry(

116

'WARN', None, None, msg, {}, timestamp=timestamp)

117

entries.append(warning_entry)

118

job.record_entry(warning_entry)

119

# echo rendered versions of all the status logs to info

120

entries.append(entry)

121

for entry in entries:

122

rendered_entry = job._logger.render_entry(entry)

123

logging.info(rendered_entry)

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

124

job._parse_status(rendered_entry)

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

125

126

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

127

class base_server_job(base_job.base_job):

128

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

129

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

130

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

139

"""

140

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

141

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

142

143

def __init__(self, control, args, resultdir, label, user, machines,

144

client=False, parse_job='',

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

145

ssh_user='root', ssh_port=22, ssh_pass='',

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

146

group_name='', tag='',

147

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

148

"""

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

149

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

150

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

151

@param control: The pathname of the control file.

152

@param args: Passed to the control file.

153

@param resultdir: Where to throw the results.

154

@param label: Description of the job.

155

@param user: Username for the job (email address).

156

@param client: True if this is a client-side control file.

157

@param parse_job: string, if supplied it is the job execution tag that

158

the results will be passed through to the TKO parser with.

159

@param ssh_user: The SSH username. [root]

160

@param ssh_port: The SSH port number. [22]

161

@param ssh_pass: The SSH passphrase, if needed.

162

@param group_name: If supplied, this will be written out as

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

163

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

164

@param tag: The job execution tag from the scheduler. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

165

@param control_filename: The filename where the server control file

166

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

167

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

168

super(base_server_job, self).__init__(resultdir=resultdir)

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

169

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

170

path = os.path.dirname(__file__)

171

self.control = control

172

self._uncollected_log_file = os.path.join(self.resultdir,

173

'uncollected_logs')

174

debugdir = os.path.join(self.resultdir, 'debug')

175

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

182

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

183

self.args = args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

184

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

185

self._client = client

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

186

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

187

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

188

self._ssh_user = ssh_user

189

self._ssh_port = ssh_port

190

self._ssh_pass = ssh_pass

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

191

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

192

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

193

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

194

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

195

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

196

self._control_filename = control_filename

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

197

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

198

self.logging = logging_manager.get_logging_manager(

199

manage_stdout_and_stderr=True, redirect_fds=True)

200

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

201

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

202

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

203

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

204

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

205

job_data = {'label' : label, 'user' : user,

206

'hostname' : ','.join(machines),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

207

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

208

'job_started' : str(int(time.time()))}

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

209

if group_name:

210

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

211

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

212

# only write these keyvals out on the first job in a resultdir

213

if 'job_started' not in utils.read_keyval(self.resultdir):

214

job_data.update(get_site_job_data(self))

215

utils.write_keyval(self.resultdir, job_data)

216

217

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

218

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

219

self.pkgmgr = packages.PackageManager(

220

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

221

self.num_tests_run = 0

222

self.num_tests_failed = 0

223

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

224

self._register_subcommand_hooks()

225

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

226

# these components aren't usable on the server

227

self.bootloader = None

228

self.harness = None

229

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

230

# set up the status logger

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

231

self._indenter = status_indenter()

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

232

self._logger = base_job.status_logger(

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

233

self, self._indenter, 'status.log', 'status.log',

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

234

record_hook=server_job_record_hook(self))

235

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

236

237

@classmethod

238

def _find_base_directories(cls):

239

"""

240

Determine locations of autodir, clientdir and serverdir. Assumes

241

that this file is located within serverdir and uses __file__ along

242

with relative paths to resolve the location.

243

"""

244

serverdir = os.path.abspath(os.path.dirname(__file__))

245

autodir = os.path.normpath(os.path.join(serverdir, '..'))

246

clientdir = os.path.join(autodir, 'client')

247

return autodir, clientdir, serverdir

248

249

250

def _find_resultdir(self, resultdir):

251

"""

252

Determine the location of resultdir. For server jobs we expect one to

253

always be explicitly passed in to __init__, so just return that.

254

"""

255

if resultdir:

256

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

260

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

261

def _get_status_logger(self):

262

"""Return a reference to the status logger."""

return self._logger

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

266

@staticmethod

267

def _load_control_file(path):

268

f = open(path)

269

try:

270

control_file = f.read()

271

finally:

272

f.close()

273

return re.sub('\r', '', control_file)

274

275

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

276

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

277

"""

278

Register some hooks into the subcommand modules that allow us

279

to properly clean up self.hosts created in forked subprocesses.

280

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

281

def on_fork(cmd):

282

self._existing_hosts_on_fork = set(self.hosts)

283

def on_join(cmd):

284

new_hosts = self.hosts - self._existing_hosts_on_fork

285

for host in new_hosts:

286

host.close()

287

subcommand.subcommand.register_fork_hook(on_fork)

288

subcommand.subcommand.register_join_hook(on_join)

289

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

290

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

291

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

292

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

293

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

294

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

295

the database if necessary.

296

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

297

if not self._using_parser:

298

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

299

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

300

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

301

parse_log = open(parse_log, 'w', 0)

302

tko_utils.redirect_parser_debugging(parse_log)

303

# create a job model object and set up the db

304

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

305

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

306

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

307

self.parser.start(self.job_model)

308

# check if a job already exists in the db and insert it if

309

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

310

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

311

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

312

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

313

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

314

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

315

self.job_model.index = job_idx

316

self.job_model.machine_idx = machine_idx

317

318

319

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

320

"""

321

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

322

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

323

remaining test results to the results db)

324

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

325

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

326

return

327

final_tests = self.parser.end()

328

for test in final_tests:

329

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

330

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def verify(self):

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

335

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

336

if self.resultdir:

337

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

338

try:

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

339

namespace = {'machines' : self.machines, 'job' : self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

340

'ssh_user' : self._ssh_user,

341

'ssh_port' : self._ssh_port,

342

'ssh_pass' : self._ssh_pass}

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

343

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

344

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

345

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

346

self.record('ABORT', None, None, msg)

raise

def repair(self, host_protection):

351

if not self.machines:

352

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

353

if self.resultdir:

354

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

355

namespace = {'machines': self.machines, 'job': self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

356

'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,

357

'ssh_pass': self._ssh_pass,

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

358

'protection_level': host_protection}

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

359

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

360

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def precheck(self):

"""

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

371

"""

372

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

378

"""

379

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

385

"""

386

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

391

def _make_parallel_wrapper(self, function, machines, log):

392

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

393

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

394

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

395

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

396

self._parse_job += "/" + machine

397

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

398

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

399

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

400

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

401

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

402

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

403

result = function(machine)

404

self.cleanup_parser()

405

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

406

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

407

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

408

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

409

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

410

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

411

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

412

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

413

result = function(machine)

414

return result

415

else:

416

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

421

return_results=False):

422

"""

423

Run 'function' using parallel_simple, with an extra wrapper to handle

424

the necessary setup for continuous parsing, if possible. If continuous

425

parsing is already properly initialized then this should just work.

426

427

@param function: A callable to run in parallel given each machine.

428

@param machines: A list of machine names to be passed one per subcommand

429

invocation of function.

430

@param log: If True, output will be written to output in a subdirectory

431

named after each machine.

432

@param timeout: Seconds after which the function call should timeout.

433

@param return_results: If True instead of an AutoServError being raised

434

on any error a list of the results|exceptions from the function

435

called on each arg is returned. [default: False]

436

437

@raises error.AutotestError: If any of the functions failed.

438

"""

439

wrapper = self._make_parallel_wrapper(function, machines, log)

440

return subcommand.parallel_simple(wrapper, machines,

441

log=log, timeout=timeout,

442

return_results=return_results)

443

444

445

def parallel_on_machines(self, function, machines, timeout=None):

446

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

447

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

448

@param machines: A list of machines to call function(machine) on.

449

@param timeout: Seconds after which the function call should timeout.

450

451

@returns A list of machines on which function(machine) returned

452

without raising an exception.

453

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

454

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

455

return_results=True)

456

success_machines = []

457

for result, machine in itertools.izip(results, machines):

458

if not isinstance(result, Exception):

459

success_machines.append(machine)

460

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

461

462

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

463

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

464

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

465

collect_crashdumps=True, namespace={}, control=None,

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

466

control_file_dir=None, only_collect_crashinfo=False):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

467

# for a normal job, make sure the uncollected logs file exists

468

# for a crashinfo-only run it should already exist, bail out otherwise

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

469

created_uncollected_logs = False

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

470

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

471

if only_collect_crashinfo:

472

# if this is a crashinfo-only run, and there were no existing

473

# uncollected logs, just bail out early

474

logging.info("No existing uncollected logs, "

475

"skipping crashinfo collection")

476

return

477

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

478

log_file = open(self._uncollected_log_file, "w")

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

479

pickle.dump([], log_file)

480

log_file.close()

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

481

created_uncollected_logs = True

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

482

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

483

# use a copy so changes don't affect the original dictionary

484

namespace = namespace.copy()

485

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

486

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

487

if self.control is None:

488

control = ''

489

else:

490

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

491

if control_file_dir is None:

492

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

493

494

self.aborted = False

495

namespace['machines'] = machines

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

496

namespace['args'] = self.args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

497

namespace['job'] = self

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

498

namespace['ssh_user'] = self._ssh_user

499

namespace['ssh_port'] = self._ssh_port

500

namespace['ssh_pass'] = self._ssh_pass

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

501

test_start_time = int(time.time())

502

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

503

if self.resultdir:

504

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

505

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

506

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

507

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

508

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

509

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

510

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

511

try:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

512

try:

513

if install_before and machines:

514

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

515

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

516

if only_collect_crashinfo:

517

return

518

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

519

# determine the dir to write the control files to

520

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

521

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

522

if cfd_specified:

523

temp_control_file_dir = None

524

else:

525

temp_control_file_dir = tempfile.mkdtemp(

526

suffix='temp_control_file_dir')

527

control_file_dir = temp_control_file_dir

528

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

529

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

530

client_control_file = os.path.join(control_file_dir,

531

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

532

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

533

namespace['control'] = control

534

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

535

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

536

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

537

else:

538

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

539

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

540

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

541

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

542

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

543

# no error occured, so we don't need to collect crashinfo

544

collect_crashinfo = False

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame^]

545

except Exception, e:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

546

try:

547

logging.exception(

548

'Exception escaped control file, job aborting:')

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame^]

549

self.record('INFO', None, None, str(e),

550

{'job_abort_reason': str(e)})

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

551

except:

552

pass # don't let logging exceptions here interfere

553

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

554

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

555

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

556

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

557

try:

558

shutil.rmtree(temp_control_file_dir)

559

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

560

logging.warn('Could not remove temp directory %s: %s',

561

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

562

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

563

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

564

namespace['test_start_time'] = test_start_time

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

565

if collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

566

# includes crashdumps

567

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

568

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

569

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

570

if self._uncollected_log_file and created_uncollected_logs:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

571

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

572

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

573

if cleanup and machines:

574

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

575

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

576

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

577

578

579

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

580

"""

581

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

582

583

tag

584

tag to add to testname

585

url

586

url of the test to run

587

"""

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

588

group, testname = self.pkgmgr.get_package_name(url, 'test')

589

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

590

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

595

except error.TestBaseException, e:

596

self.record(e.exit_status, subdir, testname, str(e))

597

raise

598

except Exception, e:

599

info = str(e) + "\n" + traceback.format_exc()

600

self.record('FAIL', subdir, testname, info)

601

raise

602

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

603

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

604

605

result, exc_info = self._run_group(testname, subdir, group_func)

606

if exc_info and isinstance(exc_info[1], error.TestBaseException):

607

return False

608

elif exc_info:

609

raise exc_info[0], exc_info[1], exc_info[2]

610

else:

611

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

612

613

614

def _run_group(self, name, subdir, function, *args, **dargs):

615

"""\

616

Underlying method for running something inside of a group.

617

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

618

result, exc_info = None, None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

619

try:

620

self.record('START', subdir, name)

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

621

result = function(*args, **dargs)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

622

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

623

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

624

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

625

except Exception, e:

626

err_msg = str(e) + '\n'

627

err_msg += traceback.format_exc()

628

self.record('END ABORT', subdir, name, err_msg)

629

raise error.JobError(name + ' failed\n' + traceback.format_exc())

630

else:

631

self.record('END GOOD', subdir, name)

632

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

633

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

634

635

636

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

642

"""

643

644

name = function.__name__

645

646

# Allow the tag for the group to be specified.

647

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

651

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

652

653

654

def run_reboot(self, reboot_func, get_kernel_func):

655

"""\

656

A specialization of run_group meant specifically for handling

657

a reboot. Includes support for capturing the kernel version

658

after the reboot.

659

660

reboot_func: a function that carries out the reboot

661

662

get_kernel_func: a function that returns a string

663

representing the kernel version.

664

"""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

665

try:

666

self.record('START', None, 'reboot')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

667

reboot_func()

668

except Exception, e:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

669

err_msg = str(e) + '\n' + traceback.format_exc()

670

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

671

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

672

else:

673

kernel = get_kernel_func()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

674

self.record('END GOOD', None, 'reboot',

675

optional_fields={"kernel": kernel})

676

677

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

678

def run_control(self, path):

679

"""Execute a control file found at path (relative to the autotest

680

path). Intended for executing a control file within a control file,

681

not for running the top-level job control file."""

682

path = os.path.join(self.autodir, path)

683

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

684

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

685

686

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

687

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

688

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

693

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

694

695

696

def _add_sysinfo_loggable(self, loggable, on_every_test):

697

if on_every_test:

698

self.sysinfo.test_loggables.add(loggable)

699

else:

700

self.sysinfo.boot_loggables.add(loggable)

701

702

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

703

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

704

"""Poll all the warning loggers and extract any new warnings that have

705

been logged. If the warnings belong to a category that is currently

706

disabled, this method will discard them and they will no longer be

707

retrievable.

708

709

Returns a list of (timestamp, message) tuples, where timestamp is an

710

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

711

warnings = []

712

while True:

713

# pull in a line of output from every logger that has

714

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

715

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

716

closed_loggers = set()

717

for logger in loggers:

718

line = logger.readline()

719

# record any broken pipes (aka line == empty)

720

if len(line) == 0:

721

closed_loggers.add(logger)

722

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

723

# parse out the warning

724

timestamp, msgtype, msg = line.split('\t', 2)

725

timestamp = int(timestamp)

726

# if the warning is valid, add it to the results

727

if self.warning_manager.is_valid(timestamp, msgtype):

728

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

729

730

# stop listening to loggers that are closed

731

self.warning_loggers -= closed_loggers

732

733

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

742

def _unique_subdirectory(self, base_subdirectory_name):

743

"""Compute a unique results subdirectory based on the given name.

744

745

Appends base_subdirectory_name with a number as necessary to find a

746

directory name that doesn't already exist.

747

"""

748

subdirectory = base_subdirectory_name

749

counter = 1

750

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

751

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

756

def get_record_context(self):

757

"""Returns an object representing the current job.record context.

758

759

The object returned is an opaque object with a 0-arg restore method

760

which can be called to restore the job.record context (i.e. indentation)

761

to the current level. The intention is that it should be used when

762

something external which generate job.record calls (e.g. an autotest

763

client) can fail catastrophically and the server job record state

764

needs to be reset to its original "known good" state.

765

766

@return: A context object with a 0-arg restore() method."""

767

return self._indenter.get_context()

768

769

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

770

def record_summary(self, status_code, test_name, reason='', attributes=None,

771

distinguishing_attributes=(), child_test_ids=None):

772

"""Record a summary test result.

773

774

@param status_code: status code string, see

775

common_lib.log.is_valid_status()

776

@param test_name: name of the test

777

@param reason: (optional) string providing detailed reason for test

778

outcome

779

@param attributes: (optional) dict of string keyvals to associate with

780

this result

781

@param distinguishing_attributes: (optional) list of attribute names

782

that should be used to distinguish identically-named test

783

results. These attributes should be present in the attributes

784

parameter. This is used to generate user-friendly subdirectory

785

names.

786

@param child_test_ids: (optional) list of test indices for test results

787

used in generating this result.

788

"""

789

subdirectory_name_parts = [test_name]

790

for attribute in distinguishing_attributes:

791

assert attributes

792

assert attribute in attributes, '%s not in %s' % (attribute,

793

attributes)

794

subdirectory_name_parts.append(attributes[attribute])

795

base_subdirectory_name = '.'.join(subdirectory_name_parts)

796

797

subdirectory = self._unique_subdirectory(base_subdirectory_name)

798

subdirectory_path = os.path.join(self.resultdir, subdirectory)

799

os.mkdir(subdirectory_path)

800

801

self.record(status_code, subdirectory, test_name,

802

status=reason, optional_fields={'is_summary': True})

803

804

if attributes:

805

utils.write_keyval(subdirectory_path, attributes)

806

807

if child_test_ids:

808

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

809

summary_data = {'child_test_ids': ids_string}

810

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

814

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

815

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

816

self.record("INFO", None, None,

817

"disabling %s warnings" % warning_type,

818

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

819

820

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

821

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

822

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

823

self.record("INFO", None, None,

824

"enabling %s warnings" % warning_type,

825

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

826

827

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

828

def get_status_log_path(self, subdir=None):

829

"""Return the path to the job status log.

830

831

@param subdir - Optional paramter indicating that you want the path

832

to a subdirectory status log.

833

834

@returns The path where the status log should be.

835

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

836

if self.resultdir:

837

if subdir:

838

return os.path.join(self.resultdir, subdir, "status.log")

839

else:

840

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

841

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

842

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

843

844

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

845

def _update_uncollected_logs_list(self, update_func):

846

"""Updates the uncollected logs list in a multi-process safe manner.

847

848

@param update_func - a function that updates the list of uncollected

849

logs. Should take one parameter, the list to be updated.

850

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

851

if self._uncollected_log_file:

852

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

853

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

854

try:

855

uncollected_logs = pickle.load(log_file)

856

update_func(uncollected_logs)

857

log_file.seek(0)

858

log_file.truncate()

859

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

860

log_file.flush()

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

861

finally:

862

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

867

"""Adds a new set of client logs to the list of uncollected logs,

868

to allow for future log recovery.

869

870

@param host - the hostname of the machine holding the logs

871

@param remote_path - the directory on the remote machine holding logs

872

@param local_path - the local directory to copy the logs into

873

"""

874

def update_func(logs_list):

875

logs_list.append((hostname, remote_path, local_path))

876

self._update_uncollected_logs_list(update_func)

877

878

879

def remove_client_log(self, hostname, remote_path, local_path):

880

"""Removes a set of client logs from the list of uncollected logs,

881

to allow for future log recovery.

882

883

@param host - the hostname of the machine holding the logs

884

@param remote_path - the directory on the remote machine holding logs

885

@param local_path - the local directory to copy the logs into

886

"""

887

def update_func(logs_list):

888

logs_list.remove((hostname, remote_path, local_path))

889

self._update_uncollected_logs_list(update_func)

890

891

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

892

def get_client_logs(self):

893

"""Retrieves the list of uncollected logs, if it exists.

894

895

@returns A list of (host, remote_path, local_path) tuples. Returns

896

an empty list if no uncollected logs file exists.

897

"""

898

log_exists = (self._uncollected_log_file and

899

os.path.exists(self._uncollected_log_file))

900

if log_exists:

901

return pickle.load(open(self._uncollected_log_file))

else:

return []

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

906

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

907

"""

908

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

909

910

This sets up the control file API by importing modules and making them

911

available under the appropriate names within namespace.

912

913

For use by _execute_code().

914

915

Args:

916

namespace: The namespace dictionary to fill in.

917

protect: Boolean. If True (the default) any operation that would

918

clobber an existing entry in namespace will cause an error.

919

Raises:

920

error.AutoservError: When a name would be clobbered by import.

921

"""

922

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

923

"""

924

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

925

926

Args:

927

module_name: The string module name.

928

names: A limiting list of names to import from module_name. If

929

empty (the default), all names are imported from the module

930

similar to a "from foo.bar import *" statement.

931

Raises:

932

error.AutoservError: When a name being imported would clobber

933

a name already in namespace.

934

"""

935

module = __import__(module_name, {}, {}, names)

936

937

# No names supplied? Import * from the lowest level module.

938

# (Ugh, why do I have to implement this part myself?)

939

if not names:

940

for submodule_name in module_name.split('.')[1:]:

941

module = getattr(module, submodule_name)

942

if hasattr(module, '__all__'):

943

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

948

# doesn't override anything that already exists.

949

for name in names:

950

# Check for conflicts to help prevent future problems.

951

if name in namespace and protect:

952

if namespace[name] is not getattr(module, name):

953

raise error.AutoservError('importing name '

954

'%s from %s %r would override %r' %

955

(name, module_name, getattr(module, name),

956

namespace[name]))

957

else:

958

# Encourage cleanliness and the use of __all__ for a

959

# more concrete API with less surprises on '*' imports.

960

warnings.warn('%s (%r) being imported from %s for use '

961

'in server control files is not the '

962

'first occurrance of that import.' %

963

(name, namespace[name], module_name))

964

965

namespace[name] = getattr(module, name)

966

967

968

# This is the equivalent of prepending a bunch of import statements to

969

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

970

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

971

_import_names('autotest_lib.server',

972

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

973

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

974

_import_names('autotest_lib.server.subcommand',

975

('parallel', 'parallel_simple', 'subcommand'))

976

_import_names('autotest_lib.server.utils',

977

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

978

_import_names('autotest_lib.client.common_lib.error')

979

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

980

981

# Inject ourself as the job object into other classes within the API.

982

# (Yuck, this injection is a gross thing be part of a public API. -gps)

983

#

984

# XXX Base & SiteAutotest do not appear to use .job. Who does?

985

namespace['autotest'].Autotest.job = self

986

# server.hosts.base_classes.Host uses .job.

987

namespace['hosts'].Host.job = self

988

989

990

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

991

"""

992

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

993

994

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

999

namespace: A dict containing names to make available during execution.

1000

protect: Boolean. If True (the default) a copy of the namespace dict

1001

is used during execution to prevent the code from modifying its

1002

contents outside of this function. If False the raw dict is

1003

passed in and modifications will be allowed.

1004

"""

1005

if protect:

1006

namespace = namespace.copy()

1007

self._fill_server_control_namespace(namespace, protect=protect)

1008

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1009

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1010

machines_text = '\n'.join(self.machines) + '\n'

1011

# Only rewrite the file if it does not match our machine list.

1012

try:

1013

machines_f = open(MACHINES_FILENAME, 'r')

1014

existing_machines_text = machines_f.read()

1015

machines_f.close()

1016

except EnvironmentError:

1017

existing_machines_text = None

1018

if machines_text != existing_machines_text:

1019

utils.open_write_close(MACHINES_FILENAME, machines_text)

1020

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1021

1022

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1023

def _parse_status(self, new_line):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1024

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1025

return

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1026

new_tests = self.parser.process_lines([new_line])

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1027

for test in new_tests:

1028

self.__insert_test(test)

1029

1030

1031

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1032

"""

1033

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1034

database. This method will not raise an exception, even if an

1035

error occurs during the insert, to avoid failing a test

1036

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1037

self.num_tests_run += 1

1038

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1039

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1040

try:

1041

self.results_db.insert_test(self.job_model, test)

1042

except Exception:

1043

msg = ("WARNING: An unexpected error occured while "

1044

"inserting test results into the database. "

1045

"Ignoring error.\n" + traceback.format_exc())

1046

print >> sys.stderr, msg

1047

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1048

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1049

def preprocess_client_state(self):

1050

"""

1051

Produce a state file for initializing the state of a client job.

1052

1053

Creates a new client state file with all the current server state, as

1054

well as some pre-set client state.

1055

1056

@returns The path of the file the state was written into.

1057

"""

1058

# initialize the sysinfo state

1059

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1060

1061

# dump the state out to a tempfile

1062

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1063

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1064

1065

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1066

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1071

"""

1072

Update the state of this job with the state from a client job.

1073

1074

Updates the state of the server side of a job with the final state

1075

of a client job that was run. Updates the non-client-specific state,

1076

pulls in some specific bits from the client-specific state, and then

1077

discards the rest. Removes the state file afterwards

1078

1079

@param state_file A path to the state file from the client.

1080

"""

1081

# update the on-disk state

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1082

try:

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1083

self._state.read_from_file(state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1084

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1085

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1086

# ignore file-not-found errors

1087

if e.errno != errno.ENOENT:

1088

raise

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1089

else:

1090

logging.debug('Client state file %s not found', state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1091

1092

# update the sysinfo state

1093

if self._state.has('client', 'sysinfo'):

1094

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1095

1096

# drop all the client-specific state

1097

self._state.discard_namespace('client')

1098

1099

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

1100

def clear_all_known_hosts(self):

1101

"""Clears known hosts files for all AbstractSSHHosts."""

1102

for host in self.hosts:

1103

if isinstance(host, abstract_ssh.AbstractSSHHost):

1104

host.clear_known_hosts()

1105

1106

mbligh

a700772

2009-01-13 00:37:11 +0000

[diff] [blame]

1107

site_server_job = utils.import_site_class(

1108

__file__, "autotest_lib.server.site_server_job", "site_server_job",

1109

base_server_job)

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1110

mbligh

0a8c332

2009-04-28 18:32:19 +0000

[diff] [blame]

1111

class server_job(site_server_job):

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1112

pass

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1113

1114

1115

class warning_manager(object):

1116

"""Class for controlling warning logs. Manages the enabling and disabling

1117

of warnings."""

1118

def __init__(self):

1119

# a map of warning types to a list of disabled time intervals

1120

self.disabled_warnings = {}

1121

1122

1123

def is_valid(self, timestamp, warning_type):

1124

"""Indicates if a warning (based on the time it occured and its type)

1125

is a valid warning. A warning is considered "invalid" if this type of

1126

warning was marked as "disabled" at the time the warning occured."""

1127

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1128

for start, end in disabled_intervals:

1129

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1135

"""As of now, disables all further warnings of this type."""

1136

intervals = self.disabled_warnings.setdefault(warning_type, [])

1137

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1138

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1139

1140

1141

def enable_warnings(self, warning_type, current_time_func=time.time):

1142

"""As of now, enables all further warnings of this type."""

1143

intervals = self.disabled_warnings.get(warning_type, [])

1144

if intervals and intervals[-1][1] is None:

jadmanski