Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2009-03-19 20:25:24 +0000

[diff] [blame]

9

import getpass, os, sys, re, stat, tempfile, time, select, subprocess

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

10

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

11

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

12

from autotest_lib.client.common_lib import base_job

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

13

from autotest_lib.client.common_lib import error, log, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

14

from autotest_lib.client.common_lib import logging_manager

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

15

from autotest_lib.server import test, subcommand, profilers

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

16

from autotest_lib.server.hosts import abstract_ssh

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

17

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

18

19

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

20

def _control_segment_path(name):

21

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

22

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

23

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

24

25

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

26

CLIENT_CONTROL_FILENAME = 'control'

27

SERVER_CONTROL_FILENAME = 'control.srv'

28

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

29

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

30

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

31

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

32

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

33

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

34

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

35

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

36

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

37

REPAIR_CONTROL_FILE = _control_segment_path('repair')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

38

39

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

40

# by default provide a stub that generates no site data

41

def _get_site_job_data_dummy(job):

return {}

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

45

# load up site-specific code for generating site-specific job data

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

46

get_site_job_data = utils.import_site_function(__file__,

jadmanski

c0a623d

2009-03-03 21:11:48 +0000

[diff] [blame]

47

"autotest_lib.server.site_server_job", "get_site_job_data",

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

48

_get_site_job_data_dummy)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

49

50

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

51

class status_indenter(base_job.status_indenter):

52

"""Provide a simple integer-backed status indenter."""

def __init__(self):

self._indent = 0

@property

def indent(self):

return self._indent

def increment(self):

self._indent += 1

def decrement(self):

self._indent -= 1

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

70

def get_context(self):

71

"""Returns a context object for use by job.get_record_context."""

72

class context(object):

73

def __init__(self, indenter, indent):

74

self._indenter = indenter

75

self._indent = indent

76

def restore(self):

77

self._indenter._indent = self._indent

78

return context(self, self._indent)

79

80

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

81

class server_job_record_hook(object):

82

"""The job.record hook for server job. Used to inject WARN messages from

83

the console or vlm whenever new logs are written, and to echo any logs

84

to INFO level logging. Implemented as a class so that it can use state to

85

block recursive calls, so that the hook can call job.record itself to

86

log WARN messages.

87

88

Depends on job._read_warnings and job._logger.

89

"""

90

def __init__(self, job):

91

self._job = job

92

self._being_called = False

93

94

95

def __call__(self, entry):

96

"""A wrapper around the 'real' record hook, the _hook method, which

97

prevents recursion. This isn't making any effort to be threadsafe,

98

the intent is to outright block infinite recursion via a

99

job.record->_hook->job.record->_hook->job.record... chain."""

100

if self._being_called:

101

return

102

self._being_called = True

103

try:

104

self._hook(self._job, entry)

105

finally:

106

self._being_called = False

@staticmethod

def _hook(job, entry):

111

"""The core hook, which can safely call job.record."""

112

entries = []

113

# poll all our warning loggers for new warnings

114

for timestamp, msg in job._read_warnings():

115

warning_entry = base_job.status_log_entry(

116

'WARN', None, None, msg, {}, timestamp=timestamp)

117

entries.append(warning_entry)

118

job.record_entry(warning_entry)

119

# echo rendered versions of all the status logs to info

120

entries.append(entry)

121

for entry in entries:

122

rendered_entry = job._logger.render_entry(entry)

123

logging.info(rendered_entry)

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame^]

124

job._parse_status(rendered_entry)

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

125

126

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

127

class base_server_job(base_job.base_job):

128

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

129

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

130

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

139

"""

140

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

141

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

142

143

def __init__(self, control, args, resultdir, label, user, machines,

144

client=False, parse_job='',

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

145

ssh_user='root', ssh_port=22, ssh_pass='',

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

146

group_name='', tag='',

147

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

148

"""

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

149

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

150

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

151

@param control: The pathname of the control file.

152

@param args: Passed to the control file.

153

@param resultdir: Where to throw the results.

154

@param label: Description of the job.

155

@param user: Username for the job (email address).

156

@param client: True if this is a client-side control file.

157

@param parse_job: string, if supplied it is the job execution tag that

158

the results will be passed through to the TKO parser with.

159

@param ssh_user: The SSH username. [root]

160

@param ssh_port: The SSH port number. [22]

161

@param ssh_pass: The SSH passphrase, if needed.

162

@param group_name: If supplied, this will be written out as

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

163

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

164

@param tag: The job execution tag from the scheduler. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

165

@param control_filename: The filename where the server control file

166

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

167

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

168

super(base_server_job, self).__init__(resultdir=resultdir)

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

169

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

170

path = os.path.dirname(__file__)

171

self.control = control

172

self._uncollected_log_file = os.path.join(self.resultdir,

173

'uncollected_logs')

174

debugdir = os.path.join(self.resultdir, 'debug')

175

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

182

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

183

self.args = args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

184

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

185

self._client = client

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

186

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

187

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

188

self._ssh_user = ssh_user

189

self._ssh_port = ssh_port

190

self._ssh_pass = ssh_pass

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

191

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

192

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

193

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

194

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

195

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

196

self._control_filename = control_filename

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

197

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

198

self.logging = logging_manager.get_logging_manager(

199

manage_stdout_and_stderr=True, redirect_fds=True)

200

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

201

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

202

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

203

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

204

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

205

job_data = {'label' : label, 'user' : user,

206

'hostname' : ','.join(machines),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

207

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

208

'job_started' : str(int(time.time()))}

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

209

if group_name:

210

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

211

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

212

# only write these keyvals out on the first job in a resultdir

213

if 'job_started' not in utils.read_keyval(self.resultdir):

214

job_data.update(get_site_job_data(self))

215

utils.write_keyval(self.resultdir, job_data)

216

217

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

218

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

219

self.pkgmgr = packages.PackageManager(

220

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

221

self.num_tests_run = 0

222

self.num_tests_failed = 0

223

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

224

self._register_subcommand_hooks()

225

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

226

# these components aren't usable on the server

227

self.bootloader = None

228

self.harness = None

229

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

230

# set up the status logger

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

231

self._indenter = status_indenter()

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

232

self._logger = base_job.status_logger(

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

233

self, self._indenter, 'status.log', 'status.log',

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

234

record_hook=server_job_record_hook(self))

235

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

236

237

@classmethod

238

def _find_base_directories(cls):

239

"""

240

Determine locations of autodir, clientdir and serverdir. Assumes

241

that this file is located within serverdir and uses __file__ along

242

with relative paths to resolve the location.

243

"""

244

serverdir = os.path.abspath(os.path.dirname(__file__))

245

autodir = os.path.normpath(os.path.join(serverdir, '..'))

246

clientdir = os.path.join(autodir, 'client')

247

return autodir, clientdir, serverdir

248

249

250

def _find_resultdir(self, resultdir):

251

"""

252

Determine the location of resultdir. For server jobs we expect one to

253

always be explicitly passed in to __init__, so just return that.

254

"""

255

if resultdir:

256

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

260

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

261

def _get_status_logger(self):

262

"""Return a reference to the status logger."""

return self._logger

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

266

@staticmethod

267

def _load_control_file(path):

268

f = open(path)

269

try:

270

control_file = f.read()

271

finally:

272

f.close()

273

return re.sub('\r', '', control_file)

274

275

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

276

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

277

"""

278

Register some hooks into the subcommand modules that allow us

279

to properly clean up self.hosts created in forked subprocesses.

280

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

281

def on_fork(cmd):

282

self._existing_hosts_on_fork = set(self.hosts)

283

def on_join(cmd):

284

new_hosts = self.hosts - self._existing_hosts_on_fork

285

for host in new_hosts:

286

host.close()

287

subcommand.subcommand.register_fork_hook(on_fork)

288

subcommand.subcommand.register_join_hook(on_join)

289

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

290

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

291

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

292

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

293

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

294

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

295

the database if necessary.

296

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

297

if not self._using_parser:

298

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

299

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

300

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

301

parse_log = open(parse_log, 'w', 0)

302

tko_utils.redirect_parser_debugging(parse_log)

303

# create a job model object and set up the db

304

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

305

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

306

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

307

self.parser.start(self.job_model)

308

# check if a job already exists in the db and insert it if

309

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

310

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

311

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

312

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

313

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

314

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

315

self.job_model.index = job_idx

316

self.job_model.machine_idx = machine_idx

317

318

319

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

320

"""

321

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

322

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

323

remaining test results to the results db)

324

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

325

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

326

return

327

final_tests = self.parser.end()

328

for test in final_tests:

329

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

330

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def verify(self):

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

335

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

336

if self.resultdir:

337

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

338

try:

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

339

namespace = {'machines' : self.machines, 'job' : self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

340

'ssh_user' : self._ssh_user,

341

'ssh_port' : self._ssh_port,

342

'ssh_pass' : self._ssh_pass}

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

343

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

344

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

345

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

346

self.record('ABORT', None, None, msg)

raise

def repair(self, host_protection):

351

if not self.machines:

352

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

353

if self.resultdir:

354

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

355

namespace = {'machines': self.machines, 'job': self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

356

'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,

357

'ssh_pass': self._ssh_pass,

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

358

'protection_level': host_protection}

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

359

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

360

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def precheck(self):

"""

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

371

"""

372

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

378

"""

379

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

385

"""

386

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

391

def _make_parallel_wrapper(self, function, machines, log):

392

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

393

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

394

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

395

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

396

self._parse_job += "/" + machine

397

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

398

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

399

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

400

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

401

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

402

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

403

result = function(machine)

404

self.cleanup_parser()

405

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

406

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

407

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

408

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

409

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

410

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

411

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

412

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

413

result = function(machine)

414

return result

415

else:

416

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

421

return_results=False):

422

"""

423

Run 'function' using parallel_simple, with an extra wrapper to handle

424

the necessary setup for continuous parsing, if possible. If continuous

425

parsing is already properly initialized then this should just work.

426

427

@param function: A callable to run in parallel given each machine.

428

@param machines: A list of machine names to be passed one per subcommand

429

invocation of function.

430

@param log: If True, output will be written to output in a subdirectory

431

named after each machine.

432

@param timeout: Seconds after which the function call should timeout.

433

@param return_results: If True instead of an AutoServError being raised

434

on any error a list of the results|exceptions from the function

435

called on each arg is returned. [default: False]

436

437

@raises error.AutotestError: If any of the functions failed.

438

"""

439

wrapper = self._make_parallel_wrapper(function, machines, log)

440

return subcommand.parallel_simple(wrapper, machines,

441

log=log, timeout=timeout,

442

return_results=return_results)

443

444

445

def parallel_on_machines(self, function, machines, timeout=None):

446

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

447

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

448

@param machines: A list of machines to call function(machine) on.

449

@param timeout: Seconds after which the function call should timeout.

450

451

@returns A list of machines on which function(machine) returned

452

without raising an exception.

453

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

454

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

455

return_results=True)

456

success_machines = []

457

for result, machine in itertools.izip(results, machines):

458

if not isinstance(result, Exception):

459

success_machines.append(machine)

460

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

461

462

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

463

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

464

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

465

collect_crashdumps=True, namespace={}, control=None,

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

466

control_file_dir=None, only_collect_crashinfo=False):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

467

# for a normal job, make sure the uncollected logs file exists

468

# for a crashinfo-only run it should already exist, bail out otherwise

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

469

created_uncollected_logs = False

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

470

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

471

if only_collect_crashinfo:

472

# if this is a crashinfo-only run, and there were no existing

473

# uncollected logs, just bail out early

474

logging.info("No existing uncollected logs, "

475

"skipping crashinfo collection")

476

return

477

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

478

log_file = open(self._uncollected_log_file, "w")

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

479

pickle.dump([], log_file)

480

log_file.close()

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

481

created_uncollected_logs = True

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

482

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

483

# use a copy so changes don't affect the original dictionary

484

namespace = namespace.copy()

485

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

486

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

487

if self.control is None:

488

control = ''

489

else:

490

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

491

if control_file_dir is None:

492

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

493

494

self.aborted = False

495

namespace['machines'] = machines

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

496

namespace['args'] = self.args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

497

namespace['job'] = self

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

498

namespace['ssh_user'] = self._ssh_user

499

namespace['ssh_port'] = self._ssh_port

500

namespace['ssh_pass'] = self._ssh_pass

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

501

test_start_time = int(time.time())

502

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

503

if self.resultdir:

504

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

505

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

506

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

507

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

508

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

509

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

510

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

511

try:

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

512

try:

513

if install_before and machines:

514

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

515

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

516

if only_collect_crashinfo:

517

return

518

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

519

# determine the dir to write the control files to

520

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

521

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

522

if cfd_specified:

523

temp_control_file_dir = None

524

else:

525

temp_control_file_dir = tempfile.mkdtemp(

526

suffix='temp_control_file_dir')

527

control_file_dir = temp_control_file_dir

528

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

529

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

530

client_control_file = os.path.join(control_file_dir,

531

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

532

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

533

namespace['control'] = control

534

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

535

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

536

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

537

else:

538

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

539

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

540

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

541

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

542

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

543

# no error occured, so we don't need to collect crashinfo

544

collect_crashinfo = False

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

except:

try:

logging.exception(

'Exception escaped control file, job aborting:')

549

except:

550

pass # don't let logging exceptions here interfere

551

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

552

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

553

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

554

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

555

try:

556

shutil.rmtree(temp_control_file_dir)

557

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

558

logging.warn('Could not remove temp directory %s: %s',

559

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

560

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

561

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

562

namespace['test_start_time'] = test_start_time

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

563

if collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

564

# includes crashdumps

565

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

566

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

567

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

568

if self._uncollected_log_file and created_uncollected_logs:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

569

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

570

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

571

if cleanup and machines:

572

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

573

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

574

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

575

576

577

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

578

"""

579

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

580

581

tag

582

tag to add to testname

583

url

584

url of the test to run

585

"""

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

586

group, testname = self.pkgmgr.get_package_name(url, 'test')

587

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

588

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

593

except error.TestBaseException, e:

594

self.record(e.exit_status, subdir, testname, str(e))

595

raise

596

except Exception, e:

597

info = str(e) + "\n" + traceback.format_exc()

598

self.record('FAIL', subdir, testname, info)

599

raise

600

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

601

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

602

603

result, exc_info = self._run_group(testname, subdir, group_func)

604

if exc_info and isinstance(exc_info[1], error.TestBaseException):

605

return False

606

elif exc_info:

607

raise exc_info[0], exc_info[1], exc_info[2]

608

else:

609

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

610

611

612

def _run_group(self, name, subdir, function, *args, **dargs):

613

"""\

614

Underlying method for running something inside of a group.

615

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

616

result, exc_info = None, None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

617

try:

618

self.record('START', subdir, name)

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

619

result = function(*args, **dargs)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

620

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

621

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

622

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

623

except Exception, e:

624

err_msg = str(e) + '\n'

625

err_msg += traceback.format_exc()

626

self.record('END ABORT', subdir, name, err_msg)

627

raise error.JobError(name + ' failed\n' + traceback.format_exc())

628

else:

629

self.record('END GOOD', subdir, name)

630

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

631

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

632

633

634

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

640

"""

641

642

name = function.__name__

643

644

# Allow the tag for the group to be specified.

645

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

649

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

650

651

652

def run_reboot(self, reboot_func, get_kernel_func):

653

"""\

654

A specialization of run_group meant specifically for handling

655

a reboot. Includes support for capturing the kernel version

656

after the reboot.

657

658

reboot_func: a function that carries out the reboot

659

660

get_kernel_func: a function that returns a string

661

representing the kernel version.

662

"""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

663

try:

664

self.record('START', None, 'reboot')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

665

reboot_func()

666

except Exception, e:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

667

err_msg = str(e) + '\n' + traceback.format_exc()

668

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

669

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

670

else:

671

kernel = get_kernel_func()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

672

self.record('END GOOD', None, 'reboot',

673

optional_fields={"kernel": kernel})

674

675

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

676

def run_control(self, path):

677

"""Execute a control file found at path (relative to the autotest

678

path). Intended for executing a control file within a control file,

679

not for running the top-level job control file."""

680

path = os.path.join(self.autodir, path)

681

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

682

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

683

684

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

685

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

686

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

691

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

692

693

694

def _add_sysinfo_loggable(self, loggable, on_every_test):

695

if on_every_test:

696

self.sysinfo.test_loggables.add(loggable)

697

else:

698

self.sysinfo.boot_loggables.add(loggable)

699

700

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

701

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

702

"""Poll all the warning loggers and extract any new warnings that have

703

been logged. If the warnings belong to a category that is currently

704

disabled, this method will discard them and they will no longer be

705

retrievable.

706

707

Returns a list of (timestamp, message) tuples, where timestamp is an

708

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

709

warnings = []

710

while True:

711

# pull in a line of output from every logger that has

712

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

713

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

714

closed_loggers = set()

715

for logger in loggers:

716

line = logger.readline()

717

# record any broken pipes (aka line == empty)

718

if len(line) == 0:

719

closed_loggers.add(logger)

720

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

721

# parse out the warning

722

timestamp, msgtype, msg = line.split('\t', 2)

723

timestamp = int(timestamp)

724

# if the warning is valid, add it to the results

725

if self.warning_manager.is_valid(timestamp, msgtype):

726

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

727

728

# stop listening to loggers that are closed

729

self.warning_loggers -= closed_loggers

730

731

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

740

def _unique_subdirectory(self, base_subdirectory_name):

741

"""Compute a unique results subdirectory based on the given name.

742

743

Appends base_subdirectory_name with a number as necessary to find a

744

directory name that doesn't already exist.

745

"""

746

subdirectory = base_subdirectory_name

747

counter = 1

748

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

749

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

754

def get_record_context(self):

755

"""Returns an object representing the current job.record context.

756

757

The object returned is an opaque object with a 0-arg restore method

758

which can be called to restore the job.record context (i.e. indentation)

759

to the current level. The intention is that it should be used when

760

something external which generate job.record calls (e.g. an autotest

761

client) can fail catastrophically and the server job record state

762

needs to be reset to its original "known good" state.

763

764

@return: A context object with a 0-arg restore() method."""

765

return self._indenter.get_context()

766

767

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

768

def record_summary(self, status_code, test_name, reason='', attributes=None,

769

distinguishing_attributes=(), child_test_ids=None):

770

"""Record a summary test result.

771

772

@param status_code: status code string, see

773

common_lib.log.is_valid_status()

774

@param test_name: name of the test

775

@param reason: (optional) string providing detailed reason for test

776

outcome

777

@param attributes: (optional) dict of string keyvals to associate with

778

this result

779

@param distinguishing_attributes: (optional) list of attribute names

780

that should be used to distinguish identically-named test

781

results. These attributes should be present in the attributes

782

parameter. This is used to generate user-friendly subdirectory

783

names.

784

@param child_test_ids: (optional) list of test indices for test results

785

used in generating this result.

786

"""

787

subdirectory_name_parts = [test_name]

788

for attribute in distinguishing_attributes:

789

assert attributes

790

assert attribute in attributes, '%s not in %s' % (attribute,

791

attributes)

792

subdirectory_name_parts.append(attributes[attribute])

793

base_subdirectory_name = '.'.join(subdirectory_name_parts)

794

795

subdirectory = self._unique_subdirectory(base_subdirectory_name)

796

subdirectory_path = os.path.join(self.resultdir, subdirectory)

797

os.mkdir(subdirectory_path)

798

799

self.record(status_code, subdirectory, test_name,

800

status=reason, optional_fields={'is_summary': True})

801

802

if attributes:

803

utils.write_keyval(subdirectory_path, attributes)

804

805

if child_test_ids:

806

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

807

summary_data = {'child_test_ids': ids_string}

808

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

812

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

813

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

814

self.record("INFO", None, None,

815

"disabling %s warnings" % warning_type,

816

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

817

818

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

819

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

820

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

821

self.record("INFO", None, None,

822

"enabling %s warnings" % warning_type,

823

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

824

825

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

826

def get_status_log_path(self, subdir=None):

827

"""Return the path to the job status log.

828

829

@param subdir - Optional paramter indicating that you want the path

830

to a subdirectory status log.

831

832

@returns The path where the status log should be.

833

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

834

if self.resultdir:

835

if subdir:

836

return os.path.join(self.resultdir, subdir, "status.log")

837

else:

838

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

839

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

840

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

841

842

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

843

def _update_uncollected_logs_list(self, update_func):

844

"""Updates the uncollected logs list in a multi-process safe manner.

845

846

@param update_func - a function that updates the list of uncollected

847

logs. Should take one parameter, the list to be updated.

848

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

849

if self._uncollected_log_file:

850

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

851

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

852

try:

853

uncollected_logs = pickle.load(log_file)

854

update_func(uncollected_logs)

855

log_file.seek(0)

856

log_file.truncate()

857

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

858

log_file.flush()

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

859

finally:

860

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

865

"""Adds a new set of client logs to the list of uncollected logs,

866

to allow for future log recovery.

867

868

@param host - the hostname of the machine holding the logs

869

@param remote_path - the directory on the remote machine holding logs

870

@param local_path - the local directory to copy the logs into

871

"""

872

def update_func(logs_list):

873

logs_list.append((hostname, remote_path, local_path))

874

self._update_uncollected_logs_list(update_func)

875

876

877

def remove_client_log(self, hostname, remote_path, local_path):

878

"""Removes a set of client logs from the list of uncollected logs,

879

to allow for future log recovery.

880

881

@param host - the hostname of the machine holding the logs

882

@param remote_path - the directory on the remote machine holding logs

883

@param local_path - the local directory to copy the logs into

884

"""

885

def update_func(logs_list):

886

logs_list.remove((hostname, remote_path, local_path))

887

self._update_uncollected_logs_list(update_func)

888

889

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

890

def get_client_logs(self):

891

"""Retrieves the list of uncollected logs, if it exists.

892

893

@returns A list of (host, remote_path, local_path) tuples. Returns

894

an empty list if no uncollected logs file exists.

895

"""

896

log_exists = (self._uncollected_log_file and

897

os.path.exists(self._uncollected_log_file))

898

if log_exists:

899

return pickle.load(open(self._uncollected_log_file))

else:

return []

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

904

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

905

"""

906

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

907

908

This sets up the control file API by importing modules and making them

909

available under the appropriate names within namespace.

910

911

For use by _execute_code().

912

913

Args:

914

namespace: The namespace dictionary to fill in.

915

protect: Boolean. If True (the default) any operation that would

916

clobber an existing entry in namespace will cause an error.

917

Raises:

918

error.AutoservError: When a name would be clobbered by import.

919

"""

920

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

921

"""

922

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

923

924

Args:

925

module_name: The string module name.

926

names: A limiting list of names to import from module_name. If

927

empty (the default), all names are imported from the module

928

similar to a "from foo.bar import *" statement.

929

Raises:

930

error.AutoservError: When a name being imported would clobber

931

a name already in namespace.

932

"""

933

module = __import__(module_name, {}, {}, names)

934

935

# No names supplied? Import * from the lowest level module.

936

# (Ugh, why do I have to implement this part myself?)

937

if not names:

938

for submodule_name in module_name.split('.')[1:]:

939

module = getattr(module, submodule_name)

940

if hasattr(module, '__all__'):

941

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

946

# doesn't override anything that already exists.

947

for name in names:

948

# Check for conflicts to help prevent future problems.

949

if name in namespace and protect:

950

if namespace[name] is not getattr(module, name):

951

raise error.AutoservError('importing name '

952

'%s from %s %r would override %r' %

953

(name, module_name, getattr(module, name),

954

namespace[name]))

955

else:

956

# Encourage cleanliness and the use of __all__ for a

957

# more concrete API with less surprises on '*' imports.

958

warnings.warn('%s (%r) being imported from %s for use '

959

'in server control files is not the '

960

'first occurrance of that import.' %

961

(name, namespace[name], module_name))

962

963

namespace[name] = getattr(module, name)

964

965

966

# This is the equivalent of prepending a bunch of import statements to

967

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

968

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

969

_import_names('autotest_lib.server',

970

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

971

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

972

_import_names('autotest_lib.server.subcommand',

973

('parallel', 'parallel_simple', 'subcommand'))

974

_import_names('autotest_lib.server.utils',

975

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

976

_import_names('autotest_lib.client.common_lib.error')

977

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

978

979

# Inject ourself as the job object into other classes within the API.

980

# (Yuck, this injection is a gross thing be part of a public API. -gps)

981

#

982

# XXX Base & SiteAutotest do not appear to use .job. Who does?

983

namespace['autotest'].Autotest.job = self

984

# server.hosts.base_classes.Host uses .job.

985

namespace['hosts'].Host.job = self

986

987

988

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

989

"""

990

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

991

992

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

997

namespace: A dict containing names to make available during execution.

998

protect: Boolean. If True (the default) a copy of the namespace dict

999

is used during execution to prevent the code from modifying its

1000

contents outside of this function. If False the raw dict is

1001

passed in and modifications will be allowed.

1002

"""

1003

if protect:

1004

namespace = namespace.copy()

1005

self._fill_server_control_namespace(namespace, protect=protect)

1006

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1007

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1008

machines_text = '\n'.join(self.machines) + '\n'

1009

# Only rewrite the file if it does not match our machine list.

1010

try:

1011

machines_f = open(MACHINES_FILENAME, 'r')

1012

existing_machines_text = machines_f.read()

1013

machines_f.close()

1014

except EnvironmentError:

1015

existing_machines_text = None

1016

if machines_text != existing_machines_text:

1017

utils.open_write_close(MACHINES_FILENAME, machines_text)

1018

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1019

1020

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame^]

1021

def _parse_status(self, new_line):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1022

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1023

return

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame^]

1024

new_tests = self.parser.process_lines([new_line])

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1025

for test in new_tests:

1026

self.__insert_test(test)

1027

1028

1029

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1030

"""

1031

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1032

database. This method will not raise an exception, even if an

1033

error occurs during the insert, to avoid failing a test

1034

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1035

self.num_tests_run += 1

1036

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1037

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1038

try:

1039

self.results_db.insert_test(self.job_model, test)

1040

except Exception:

1041

msg = ("WARNING: An unexpected error occured while "

1042

"inserting test results into the database. "

1043

"Ignoring error.\n" + traceback.format_exc())

1044

print >> sys.stderr, msg

1045

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1046

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1047

def preprocess_client_state(self):

1048

"""

1049

Produce a state file for initializing the state of a client job.

1050

1051

Creates a new client state file with all the current server state, as

1052

well as some pre-set client state.

1053

1054

@returns The path of the file the state was written into.

1055

"""

1056

# initialize the sysinfo state

1057

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1058

1059

# dump the state out to a tempfile

1060

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1061

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1062

1063

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1064

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1069

"""

1070

Update the state of this job with the state from a client job.

1071

1072

Updates the state of the server side of a job with the final state

1073

of a client job that was run. Updates the non-client-specific state,

1074

pulls in some specific bits from the client-specific state, and then

1075

discards the rest. Removes the state file afterwards

1076

1077

@param state_file A path to the state file from the client.

1078

"""

1079

# update the on-disk state

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1080

try:

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1081

self._state.read_from_file(state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1082

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1083

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1084

# ignore file-not-found errors

1085

if e.errno != errno.ENOENT:

1086

raise

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1087

else:

1088

logging.debug('Client state file %s not found', state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1089

1090

# update the sysinfo state

1091

if self._state.has('client', 'sysinfo'):

1092

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1093

1094

# drop all the client-specific state

1095

self._state.discard_namespace('client')

1096

1097

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

1098

def clear_all_known_hosts(self):

1099

"""Clears known hosts files for all AbstractSSHHosts."""

1100

for host in self.hosts:

1101

if isinstance(host, abstract_ssh.AbstractSSHHost):

1102

host.clear_known_hosts()

1103

1104

mbligh

a700772

2009-01-13 00:37:11 +0000

[diff] [blame]

1105

site_server_job = utils.import_site_class(

1106

__file__, "autotest_lib.server.site_server_job", "site_server_job",

1107

base_server_job)

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1108

mbligh

0a8c332

2009-04-28 18:32:19 +0000

[diff] [blame]

1109

class server_job(site_server_job):

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1110

pass

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1111

1112

1113

class warning_manager(object):

1114

"""Class for controlling warning logs. Manages the enabling and disabling

1115

of warnings."""

1116

def __init__(self):

1117

# a map of warning types to a list of disabled time intervals

1118

self.disabled_warnings = {}

1119

1120

1121

def is_valid(self, timestamp, warning_type):

1122

"""Indicates if a warning (based on the time it occured and its type)

1123

is a valid warning. A warning is considered "invalid" if this type of

1124

warning was marked as "disabled" at the time the warning occured."""

1125

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1126

for start, end in disabled_intervals:

1127

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1133

"""As of now, disables all further warnings of this type."""

1134

intervals = self.disabled_warnings.setdefault(warning_type, [])

1135

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1136

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1137

1138

1139

def enable_warnings(self, warning_type, current_time_func=time.time):

1140

"""As of now, enables all further warnings of this type."""

1141

intervals = self.disabled_warnings.get(warning_type, [])

1142

if intervals and intervals[-1][1] is None:

jadmanski