Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2009-03-19 20:25:24 +0000

[diff] [blame]

9

import getpass, os, sys, re, stat, tempfile, time, select, subprocess

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

10

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

11

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

12

from autotest_lib.client.common_lib import base_job

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

13

from autotest_lib.client.common_lib import error, log, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

14

from autotest_lib.client.common_lib import logging_manager

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

15

from autotest_lib.server import test, subcommand, profilers

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

16

from autotest_lib.server.hosts import abstract_ssh

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

17

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

18

19

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

20

def _control_segment_path(name):

21

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

22

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

23

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

24

25

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

26

CLIENT_CONTROL_FILENAME = 'control'

27

SERVER_CONTROL_FILENAME = 'control.srv'

28

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

29

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

30

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

31

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

32

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

33

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

34

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

35

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

36

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

37

REPAIR_CONTROL_FILE = _control_segment_path('repair')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

38

39

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

40

# by default provide a stub that generates no site data

41

def _get_site_job_data_dummy(job):

return {}

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

45

# load up site-specific code for generating site-specific job data

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

46

get_site_job_data = utils.import_site_function(__file__,

jadmanski

c0a623d

2009-03-03 21:11:48 +0000

[diff] [blame]

47

"autotest_lib.server.site_server_job", "get_site_job_data",

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

48

_get_site_job_data_dummy)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

49

50

jadmanski

2a89dac

2010-06-11 14:32:58 +0000

[diff] [blame^]

51

class status_indenter(base_job.status_indenter):

52

"""Provide a simple integer-backed status indenter."""

def __init__(self):

self._indent = 0

@property

def indent(self):

return self._indent

def increment(self):

self._indent += 1

def decrement(self):

self._indent -= 1

class server_job_record_hook(object):

71

"""The job.record hook for server job. Used to inject WARN messages from

72

the console or vlm whenever new logs are written, and to echo any logs

73

to INFO level logging. Implemented as a class so that it can use state to

74

block recursive calls, so that the hook can call job.record itself to

75

log WARN messages.

76

77

Depends on job._read_warnings and job._logger.

78

"""

79

def __init__(self, job):

80

self._job = job

81

self._being_called = False

82

83

84

def __call__(self, entry):

85

"""A wrapper around the 'real' record hook, the _hook method, which

86

prevents recursion. This isn't making any effort to be threadsafe,

87

the intent is to outright block infinite recursion via a

88

job.record->_hook->job.record->_hook->job.record... chain."""

89

if self._being_called:

90

return

91

self._being_called = True

92

try:

93

self._hook(self._job, entry)

94

finally:

95

self._being_called = False

@staticmethod

def _hook(job, entry):

100

"""The core hook, which can safely call job.record."""

101

entries = []

102

# poll all our warning loggers for new warnings

103

for timestamp, msg in job._read_warnings():

104

warning_entry = base_job.status_log_entry(

105

'WARN', None, None, msg, {}, timestamp=timestamp)

106

entries.append(warning_entry)

107

job.record_entry(warning_entry)

108

# echo rendered versions of all the status logs to info

109

entries.append(entry)

110

for entry in entries:

111

rendered_entry = job._logger.render_entry(entry)

112

logging.info(rendered_entry)

113

114

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

115

class base_server_job(base_job.base_job):

116

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

117

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

118

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

127

"""

128

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

129

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

130

131

def __init__(self, control, args, resultdir, label, user, machines,

132

client=False, parse_job='',

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

133

ssh_user='root', ssh_port=22, ssh_pass='',

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

134

group_name='', tag='',

135

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

136

"""

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

137

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

138

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

139

@param control: The pathname of the control file.

140

@param args: Passed to the control file.

141

@param resultdir: Where to throw the results.

142

@param label: Description of the job.

143

@param user: Username for the job (email address).

144

@param client: True if this is a client-side control file.

145

@param parse_job: string, if supplied it is the job execution tag that

146

the results will be passed through to the TKO parser with.

147

@param ssh_user: The SSH username. [root]

148

@param ssh_port: The SSH port number. [22]

149

@param ssh_pass: The SSH passphrase, if needed.

150

@param group_name: If supplied, this will be written out as

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

151

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

152

@param tag: The job execution tag from the scheduler. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

153

@param control_filename: The filename where the server control file

154

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

155

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

156

super(base_server_job, self).__init__(resultdir=resultdir)

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

157

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

158

path = os.path.dirname(__file__)

159

self.control = control

160

self._uncollected_log_file = os.path.join(self.resultdir,

161

'uncollected_logs')

162

debugdir = os.path.join(self.resultdir, 'debug')

163

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

170

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

171

self.args = args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

172

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

173

self._client = client

174

self._record_prefix = ''

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

175

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

176

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

177

self._ssh_user = ssh_user

178

self._ssh_port = ssh_port

179

self._ssh_pass = ssh_pass

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

180

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

181

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

182

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

183

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

184

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

185

self._control_filename = control_filename

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

186

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

187

self.logging = logging_manager.get_logging_manager(

188

manage_stdout_and_stderr=True, redirect_fds=True)

189

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

190

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

191

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

192

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

193

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

194

job_data = {'label' : label, 'user' : user,

195

'hostname' : ','.join(machines),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

196

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

197

'job_started' : str(int(time.time()))}

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

198

if group_name:

199

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

200

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

201

# only write these keyvals out on the first job in a resultdir

202

if 'job_started' not in utils.read_keyval(self.resultdir):

203

job_data.update(get_site_job_data(self))

204

utils.write_keyval(self.resultdir, job_data)

205

206

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

207

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

208

self.pkgmgr = packages.PackageManager(

209

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

210

self.num_tests_run = 0

211

self.num_tests_failed = 0

212

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

213

self._register_subcommand_hooks()

214

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

215

# these components aren't usable on the server

216

self.bootloader = None

217

self.harness = None

218

jadmanski

2a89dac

2010-06-11 14:32:58 +0000

[diff] [blame^]

219

# set up the status logger

220

self._logger = base_job.status_logger(

221

self, status_indenter(), 'status.log', 'status.log',

222

record_hook=server_job_record_hook(self))

223

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

224

225

@classmethod

226

def _find_base_directories(cls):

227

"""

228

Determine locations of autodir, clientdir and serverdir. Assumes

229

that this file is located within serverdir and uses __file__ along

230

with relative paths to resolve the location.

231

"""

232

serverdir = os.path.abspath(os.path.dirname(__file__))

233

autodir = os.path.normpath(os.path.join(serverdir, '..'))

234

clientdir = os.path.join(autodir, 'client')

235

return autodir, clientdir, serverdir

236

237

238

def _find_resultdir(self, resultdir):

239

"""

240

Determine the location of resultdir. For server jobs we expect one to

241

always be explicitly passed in to __init__, so just return that.

242

"""

243

if resultdir:

244

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

248

jadmanski

2a89dac

2010-06-11 14:32:58 +0000

[diff] [blame^]

249

def _get_status_logger(self):

250

"""Return a reference to the status logger."""

return self._logger

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

254

@staticmethod

255

def _load_control_file(path):

256

f = open(path)

257

try:

258

control_file = f.read()

259

finally:

260

f.close()

261

return re.sub('\r', '', control_file)

262

263

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

264

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

265

"""

266

Register some hooks into the subcommand modules that allow us

267

to properly clean up self.hosts created in forked subprocesses.

268

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

269

def on_fork(cmd):

270

self._existing_hosts_on_fork = set(self.hosts)

271

def on_join(cmd):

272

new_hosts = self.hosts - self._existing_hosts_on_fork

273

for host in new_hosts:

274

host.close()

275

subcommand.subcommand.register_fork_hook(on_fork)

276

subcommand.subcommand.register_join_hook(on_join)

277

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

278

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

279

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

280

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

281

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

282

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

283

the database if necessary.

284

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

285

if not self._using_parser:

286

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

287

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

288

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

289

parse_log = open(parse_log, 'w', 0)

290

tko_utils.redirect_parser_debugging(parse_log)

291

# create a job model object and set up the db

292

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

293

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

294

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

295

self.parser.start(self.job_model)

296

# check if a job already exists in the db and insert it if

297

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

298

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

299

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

300

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

301

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

302

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

303

self.job_model.index = job_idx

304

self.job_model.machine_idx = machine_idx

305

306

307

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

308

"""

309

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

310

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

311

remaining test results to the results db)

312

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

313

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

314

return

315

final_tests = self.parser.end()

316

for test in final_tests:

317

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

318

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def verify(self):

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

323

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

324

if self.resultdir:

325

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

326

try:

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

327

namespace = {'machines' : self.machines, 'job' : self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

328

'ssh_user' : self._ssh_user,

329

'ssh_port' : self._ssh_port,

330

'ssh_pass' : self._ssh_pass}

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

331

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

332

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

333

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

334

self.record('ABORT', None, None, msg)

raise

def repair(self, host_protection):

339

if not self.machines:

340

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

341

if self.resultdir:

342

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

343

namespace = {'machines': self.machines, 'job': self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

344

'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,

345

'ssh_pass': self._ssh_pass,

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

346

'protection_level': host_protection}

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

347

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

348

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def precheck(self):

"""

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

359

"""

360

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

366

"""

367

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

373

"""

374

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

379

def _make_parallel_wrapper(self, function, machines, log):

380

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

381

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

382

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

383

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

384

self._parse_job += "/" + machine

385

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

386

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

387

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

388

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

389

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

390

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

391

result = function(machine)

392

self.cleanup_parser()

393

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

394

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

395

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

396

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

397

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

398

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

399

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

400

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

401

result = function(machine)

402

return result

403

else:

404

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

409

return_results=False):

410

"""

411

Run 'function' using parallel_simple, with an extra wrapper to handle

412

the necessary setup for continuous parsing, if possible. If continuous

413

parsing is already properly initialized then this should just work.

414

415

@param function: A callable to run in parallel given each machine.

416

@param machines: A list of machine names to be passed one per subcommand

417

invocation of function.

418

@param log: If True, output will be written to output in a subdirectory

419

named after each machine.

420

@param timeout: Seconds after which the function call should timeout.

421

@param return_results: If True instead of an AutoServError being raised

422

on any error a list of the results|exceptions from the function

423

called on each arg is returned. [default: False]

424

425

@raises error.AutotestError: If any of the functions failed.

426

"""

427

wrapper = self._make_parallel_wrapper(function, machines, log)

428

return subcommand.parallel_simple(wrapper, machines,

429

log=log, timeout=timeout,

430

return_results=return_results)

431

432

433

def parallel_on_machines(self, function, machines, timeout=None):

434

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

435

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

436

@param machines: A list of machines to call function(machine) on.

437

@param timeout: Seconds after which the function call should timeout.

438

439

@returns A list of machines on which function(machine) returned

440

without raising an exception.

441

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

442

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

443

return_results=True)

444

success_machines = []

445

for result, machine in itertools.izip(results, machines):

446

if not isinstance(result, Exception):

447

success_machines.append(machine)

448

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

449

450

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

451

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

452

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

453

collect_crashdumps=True, namespace={}, control=None,

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

454

control_file_dir=None, only_collect_crashinfo=False):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

455

# for a normal job, make sure the uncollected logs file exists

456

# for a crashinfo-only run it should already exist, bail out otherwise

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

457

created_uncollected_logs = False

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

458

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

459

if only_collect_crashinfo:

460

# if this is a crashinfo-only run, and there were no existing

461

# uncollected logs, just bail out early

462

logging.info("No existing uncollected logs, "

463

"skipping crashinfo collection")

464

return

465

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

466

log_file = open(self._uncollected_log_file, "w")

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

467

pickle.dump([], log_file)

468

log_file.close()

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

469

created_uncollected_logs = True

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

470

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

471

# use a copy so changes don't affect the original dictionary

472

namespace = namespace.copy()

473

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

474

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

475

if self.control is None:

476

control = ''

477

else:

478

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

479

if control_file_dir is None:

480

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

481

482

self.aborted = False

483

namespace['machines'] = machines

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

484

namespace['args'] = self.args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

485

namespace['job'] = self

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

486

namespace['ssh_user'] = self._ssh_user

487

namespace['ssh_port'] = self._ssh_port

488

namespace['ssh_pass'] = self._ssh_pass

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

489

test_start_time = int(time.time())

490

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

491

if self.resultdir:

492

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

493

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

494

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

495

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

496

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

497

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

498

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

499

try:

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

500

try:

501

if install_before and machines:

502

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

503

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

504

if only_collect_crashinfo:

505

return

506

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

507

# determine the dir to write the control files to

508

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

509

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

510

if cfd_specified:

511

temp_control_file_dir = None

512

else:

513

temp_control_file_dir = tempfile.mkdtemp(

514

suffix='temp_control_file_dir')

515

control_file_dir = temp_control_file_dir

516

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

517

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

518

client_control_file = os.path.join(control_file_dir,

519

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

520

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

521

namespace['control'] = control

522

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

523

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

524

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

525

else:

526

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

527

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

528

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

529

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

530

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

531

# no error occured, so we don't need to collect crashinfo

532

collect_crashinfo = False

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

except:

try:

logging.exception(

'Exception escaped control file, job aborting:')

537

except:

538

pass # don't let logging exceptions here interfere

539

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

540

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

541

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

542

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

543

try:

544

shutil.rmtree(temp_control_file_dir)

545

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

546

logging.warn('Could not remove temp directory %s: %s',

547

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

548

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

549

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

550

namespace['test_start_time'] = test_start_time

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

551

if collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

552

# includes crashdumps

553

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

554

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

555

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

556

if self._uncollected_log_file and created_uncollected_logs:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

557

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

558

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

559

if cleanup and machines:

560

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

561

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

562

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

563

564

565

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

566

"""

567

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

568

569

tag

570

tag to add to testname

571

url

572

url of the test to run

573

"""

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

574

group, testname = self.pkgmgr.get_package_name(url, 'test')

575

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

576

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

581

except error.TestBaseException, e:

582

self.record(e.exit_status, subdir, testname, str(e))

583

raise

584

except Exception, e:

585

info = str(e) + "\n" + traceback.format_exc()

586

self.record('FAIL', subdir, testname, info)

587

raise

588

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

589

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

590

591

result, exc_info = self._run_group(testname, subdir, group_func)

592

if exc_info and isinstance(exc_info[1], error.TestBaseException):

593

return False

594

elif exc_info:

595

raise exc_info[0], exc_info[1], exc_info[2]

596

else:

597

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

598

599

600

def _run_group(self, name, subdir, function, *args, **dargs):

601

"""\

602

Underlying method for running something inside of a group.

603

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

604

result, exc_info = None, None

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

605

old_record_prefix = self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

606

try:

607

self.record('START', subdir, name)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

608

self._record_prefix += '\t'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

609

try:

610

result = function(*args, **dargs)

611

finally:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

612

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

613

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

614

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

615

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

616

except Exception, e:

617

err_msg = str(e) + '\n'

618

err_msg += traceback.format_exc()

619

self.record('END ABORT', subdir, name, err_msg)

620

raise error.JobError(name + ' failed\n' + traceback.format_exc())

621

else:

622

self.record('END GOOD', subdir, name)

623

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

624

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

625

626

627

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

633

"""

634

635

name = function.__name__

636

637

# Allow the tag for the group to be specified.

638

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

642

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

643

644

645

def run_reboot(self, reboot_func, get_kernel_func):

646

"""\

647

A specialization of run_group meant specifically for handling

648

a reboot. Includes support for capturing the kernel version

649

after the reboot.

650

651

reboot_func: a function that carries out the reboot

652

653

get_kernel_func: a function that returns a string

654

representing the kernel version.

655

"""

656

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

657

old_record_prefix = self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

658

try:

659

self.record('START', None, 'reboot')

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

660

self._record_prefix += '\t'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

661

reboot_func()

662

except Exception, e:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

663

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

664

err_msg = str(e) + '\n' + traceback.format_exc()

665

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

666

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

667

else:

668

kernel = get_kernel_func()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

669

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

670

self.record('END GOOD', None, 'reboot',

671

optional_fields={"kernel": kernel})

672

673

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

674

def run_control(self, path):

675

"""Execute a control file found at path (relative to the autotest

676

path). Intended for executing a control file within a control file,

677

not for running the top-level job control file."""

678

path = os.path.join(self.autodir, path)

679

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

680

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

681

682

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

683

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

684

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

689

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

690

691

692

def _add_sysinfo_loggable(self, loggable, on_every_test):

693

if on_every_test:

694

self.sysinfo.test_loggables.add(loggable)

695

else:

696

self.sysinfo.boot_loggables.add(loggable)

697

698

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

699

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

700

"""Poll all the warning loggers and extract any new warnings that have

701

been logged. If the warnings belong to a category that is currently

702

disabled, this method will discard them and they will no longer be

703

retrievable.

704

705

Returns a list of (timestamp, message) tuples, where timestamp is an

706

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

707

warnings = []

708

while True:

709

# pull in a line of output from every logger that has

710

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

711

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

712

closed_loggers = set()

713

for logger in loggers:

714

line = logger.readline()

715

# record any broken pipes (aka line == empty)

716

if len(line) == 0:

717

closed_loggers.add(logger)

718

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

719

# parse out the warning

720

timestamp, msgtype, msg = line.split('\t', 2)

721

timestamp = int(timestamp)

722

# if the warning is valid, add it to the results

723

if self.warning_manager.is_valid(timestamp, msgtype):

724

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

725

726

# stop listening to loggers that are closed

727

self.warning_loggers -= closed_loggers

728

729

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

738

def _unique_subdirectory(self, base_subdirectory_name):

739

"""Compute a unique results subdirectory based on the given name.

740

741

Appends base_subdirectory_name with a number as necessary to find a

742

directory name that doesn't already exist.

743

"""

744

subdirectory = base_subdirectory_name

745

counter = 1

746

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

747

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

def record_summary(self, status_code, test_name, reason='', attributes=None,

753

distinguishing_attributes=(), child_test_ids=None):

754

"""Record a summary test result.

755

756

@param status_code: status code string, see

757

common_lib.log.is_valid_status()

758

@param test_name: name of the test

759

@param reason: (optional) string providing detailed reason for test

760

outcome

761

@param attributes: (optional) dict of string keyvals to associate with

762

this result

763

@param distinguishing_attributes: (optional) list of attribute names

764

that should be used to distinguish identically-named test

765

results. These attributes should be present in the attributes

766

parameter. This is used to generate user-friendly subdirectory

767

names.

768

@param child_test_ids: (optional) list of test indices for test results

769

used in generating this result.

770

"""

771

subdirectory_name_parts = [test_name]

772

for attribute in distinguishing_attributes:

773

assert attributes

774

assert attribute in attributes, '%s not in %s' % (attribute,

775

attributes)

776

subdirectory_name_parts.append(attributes[attribute])

777

base_subdirectory_name = '.'.join(subdirectory_name_parts)

778

779

subdirectory = self._unique_subdirectory(base_subdirectory_name)

780

subdirectory_path = os.path.join(self.resultdir, subdirectory)

781

os.mkdir(subdirectory_path)

782

783

self.record(status_code, subdirectory, test_name,

784

status=reason, optional_fields={'is_summary': True})

785

786

if attributes:

787

utils.write_keyval(subdirectory_path, attributes)

788

789

if child_test_ids:

790

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

791

summary_data = {'child_test_ids': ids_string}

792

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

796

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

797

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

798

self.record("INFO", None, None,

799

"disabling %s warnings" % warning_type,

800

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

801

802

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

803

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

804

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

805

self.record("INFO", None, None,

806

"enabling %s warnings" % warning_type,

807

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

808

809

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

810

def get_status_log_path(self, subdir=None):

811

"""Return the path to the job status log.

812

813

@param subdir - Optional paramter indicating that you want the path

814

to a subdirectory status log.

815

816

@returns The path where the status log should be.

817

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

818

if self.resultdir:

819

if subdir:

820

return os.path.join(self.resultdir, subdir, "status.log")

821

else:

822

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

823

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

824

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

825

826

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

827

def _update_uncollected_logs_list(self, update_func):

828

"""Updates the uncollected logs list in a multi-process safe manner.

829

830

@param update_func - a function that updates the list of uncollected

831

logs. Should take one parameter, the list to be updated.

832

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

833

if self._uncollected_log_file:

834

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

835

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

836

try:

837

uncollected_logs = pickle.load(log_file)

838

update_func(uncollected_logs)

839

log_file.seek(0)

840

log_file.truncate()

841

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

842

log_file.flush()

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

843

finally:

844

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

849

"""Adds a new set of client logs to the list of uncollected logs,

850

to allow for future log recovery.

851

852

@param host - the hostname of the machine holding the logs

853

@param remote_path - the directory on the remote machine holding logs

854

@param local_path - the local directory to copy the logs into

855

"""

856

def update_func(logs_list):

857

logs_list.append((hostname, remote_path, local_path))

858

self._update_uncollected_logs_list(update_func)

859

860

861

def remove_client_log(self, hostname, remote_path, local_path):

862

"""Removes a set of client logs from the list of uncollected logs,

863

to allow for future log recovery.

864

865

@param host - the hostname of the machine holding the logs

866

@param remote_path - the directory on the remote machine holding logs

867

@param local_path - the local directory to copy the logs into

868

"""

869

def update_func(logs_list):

870

logs_list.remove((hostname, remote_path, local_path))

871

self._update_uncollected_logs_list(update_func)

872

873

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

874

def get_client_logs(self):

875

"""Retrieves the list of uncollected logs, if it exists.

876

877

@returns A list of (host, remote_path, local_path) tuples. Returns

878

an empty list if no uncollected logs file exists.

879

"""

880

log_exists = (self._uncollected_log_file and

881

os.path.exists(self._uncollected_log_file))

882

if log_exists:

883

return pickle.load(open(self._uncollected_log_file))

else:

return []

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

888

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

889

"""

890

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

891

892

This sets up the control file API by importing modules and making them

893

available under the appropriate names within namespace.

894

895

For use by _execute_code().

896

897

Args:

898

namespace: The namespace dictionary to fill in.

899

protect: Boolean. If True (the default) any operation that would

900

clobber an existing entry in namespace will cause an error.

901

Raises:

902

error.AutoservError: When a name would be clobbered by import.

903

"""

904

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

905

"""

906

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

907

908

Args:

909

module_name: The string module name.

910

names: A limiting list of names to import from module_name. If

911

empty (the default), all names are imported from the module

912

similar to a "from foo.bar import *" statement.

913

Raises:

914

error.AutoservError: When a name being imported would clobber

915

a name already in namespace.

916

"""

917

module = __import__(module_name, {}, {}, names)

918

919

# No names supplied? Import * from the lowest level module.

920

# (Ugh, why do I have to implement this part myself?)

921

if not names:

922

for submodule_name in module_name.split('.')[1:]:

923

module = getattr(module, submodule_name)

924

if hasattr(module, '__all__'):

925

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

930

# doesn't override anything that already exists.

931

for name in names:

932

# Check for conflicts to help prevent future problems.

933

if name in namespace and protect:

934

if namespace[name] is not getattr(module, name):

935

raise error.AutoservError('importing name '

936

'%s from %s %r would override %r' %

937

(name, module_name, getattr(module, name),

938

namespace[name]))

939

else:

940

# Encourage cleanliness and the use of __all__ for a

941

# more concrete API with less surprises on '*' imports.

942

warnings.warn('%s (%r) being imported from %s for use '

943

'in server control files is not the '

944

'first occurrance of that import.' %

945

(name, namespace[name], module_name))

946

947

namespace[name] = getattr(module, name)

948

949

950

# This is the equivalent of prepending a bunch of import statements to

951

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

952

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

953

_import_names('autotest_lib.server',

954

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

955

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

956

_import_names('autotest_lib.server.subcommand',

957

('parallel', 'parallel_simple', 'subcommand'))

958

_import_names('autotest_lib.server.utils',

959

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

960

_import_names('autotest_lib.client.common_lib.error')

961

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

962

963

# Inject ourself as the job object into other classes within the API.

964

# (Yuck, this injection is a gross thing be part of a public API. -gps)

965

#

966

# XXX Base & SiteAutotest do not appear to use .job. Who does?

967

namespace['autotest'].Autotest.job = self

968

# server.hosts.base_classes.Host uses .job.

969

namespace['hosts'].Host.job = self

970

971

972

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

973

"""

974

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

975

976

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

981

namespace: A dict containing names to make available during execution.

982

protect: Boolean. If True (the default) a copy of the namespace dict

983

is used during execution to prevent the code from modifying its

984

contents outside of this function. If False the raw dict is

985

passed in and modifications will be allowed.

986

"""

987

if protect:

988

namespace = namespace.copy()

989

self._fill_server_control_namespace(namespace, protect=protect)

990

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

991

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

992

machines_text = '\n'.join(self.machines) + '\n'

993

# Only rewrite the file if it does not match our machine list.

994

try:

995

machines_f = open(MACHINES_FILENAME, 'r')

996

existing_machines_text = machines_f.read()

997

machines_f.close()

998

except EnvironmentError:

999

existing_machines_text = None

1000

if machines_text != existing_machines_text:

1001

utils.open_write_close(MACHINES_FILENAME, machines_text)

1002

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1003

1004

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1005

def __parse_status(self, new_lines):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1006

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1007

return

1008

new_tests = self.parser.process_lines(new_lines)

1009

for test in new_tests:

1010

self.__insert_test(test)

1011

1012

1013

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1014

"""

1015

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1016

database. This method will not raise an exception, even if an

1017

error occurs during the insert, to avoid failing a test

1018

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1019

self.num_tests_run += 1

1020

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1021

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1022

try:

1023

self.results_db.insert_test(self.job_model, test)

1024

except Exception:

1025

msg = ("WARNING: An unexpected error occured while "

1026

"inserting test results into the database. "

1027

"Ignoring error.\n" + traceback.format_exc())

1028

print >> sys.stderr, msg

1029

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1030

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1031

def preprocess_client_state(self):

1032

"""

1033

Produce a state file for initializing the state of a client job.

1034

1035

Creates a new client state file with all the current server state, as

1036

well as some pre-set client state.

1037

1038

@returns The path of the file the state was written into.

1039

"""

1040

# initialize the sysinfo state

1041

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1042

1043

# dump the state out to a tempfile

1044

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1045

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1046

1047

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1048

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1053

"""

1054

Update the state of this job with the state from a client job.

1055

1056

Updates the state of the server side of a job with the final state

1057

of a client job that was run. Updates the non-client-specific state,

1058

pulls in some specific bits from the client-specific state, and then

1059

discards the rest. Removes the state file afterwards

1060

1061

@param state_file A path to the state file from the client.

1062

"""

1063

# update the on-disk state

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1064

try:

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1065

self._state.read_from_file(state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1066

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1067

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1068

# ignore file-not-found errors

1069

if e.errno != errno.ENOENT:

1070

raise

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1071

else:

1072

logging.debug('Client state file %s not found', state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1073

1074

# update the sysinfo state

1075

if self._state.has('client', 'sysinfo'):

1076

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1077

1078

# drop all the client-specific state

1079

self._state.discard_namespace('client')

1080

1081

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

1082

def clear_all_known_hosts(self):

1083

"""Clears known hosts files for all AbstractSSHHosts."""

1084

for host in self.hosts:

1085

if isinstance(host, abstract_ssh.AbstractSSHHost):

1086

host.clear_known_hosts()

1087

1088

mbligh

a700772

2009-01-13 00:37:11 +0000

[diff] [blame]

1089

site_server_job = utils.import_site_class(

1090

__file__, "autotest_lib.server.site_server_job", "site_server_job",

1091

base_server_job)

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1092

mbligh

0a8c332

2009-04-28 18:32:19 +0000

[diff] [blame]

1093

class server_job(site_server_job):

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1094

pass

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1095

1096

1097

class warning_manager(object):

1098

"""Class for controlling warning logs. Manages the enabling and disabling

1099

of warnings."""

1100

def __init__(self):

1101

# a map of warning types to a list of disabled time intervals

1102

self.disabled_warnings = {}

1103

1104

1105

def is_valid(self, timestamp, warning_type):

1106

"""Indicates if a warning (based on the time it occured and its type)

1107

is a valid warning. A warning is considered "invalid" if this type of

1108

warning was marked as "disabled" at the time the warning occured."""

1109

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1110

for start, end in disabled_intervals:

1111

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1117

"""As of now, disables all further warnings of this type."""

1118

intervals = self.disabled_warnings.setdefault(warning_type, [])

1119

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1120

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1121

1122

1123

def enable_warnings(self, warning_type, current_time_func=time.time):

1124

"""As of now, enables all further warnings of this type."""

1125

intervals = self.disabled_warnings.get(warning_type, [])

1126

if intervals and intervals[-1][1] is None:

jadmanski