Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2013-04-12 09:31:29 -0700

[diff] [blame]

1

# pylint: disable-msg=C0111

2

Paul Pendlebury

7c1fdcf

2011-05-04 12:39:15 -0700

[diff] [blame]

3

4

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

mbligh

57e7866

2008-06-17 19:53:49 +0000

[diff] [blame]

6

"""

7

The main job wrapper for the server side.

8

9

This is the core infrastructure. Derived from the client side job.py

10

11

Copyright Martin J. Bligh, Andy Whitcroft 2007

12

"""

13

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

14

import getpass, os, sys, re, tempfile, time, select, platform

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

15

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

16

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

17

from autotest_lib.client.common_lib import base_job

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

18

from autotest_lib.client.common_lib import error, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

19

from autotest_lib.client.common_lib import logging_manager

Paul Pendlebury

5759356

2011-06-15 10:45:49 -0700

[diff] [blame]

20

from autotest_lib.server import test, subcommand, profilers

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

21

from autotest_lib.server.hosts import abstract_ssh

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

22

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

23

24

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

25

def _control_segment_path(name):

26

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

27

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

28

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

29

30

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

31

CLIENT_CONTROL_FILENAME = 'control'

32

SERVER_CONTROL_FILENAME = 'control.srv'

33

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

34

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

35

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

36

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

37

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

38

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

39

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

40

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

41

REPAIR_CONTROL_FILE = _control_segment_path('repair')

Alex Miller

cb79ba7

2013-05-29 14:43:00 -0700

[diff] [blame]

42

PROVISION_CONTROL_FILE = _control_segment_path('provision')

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame]

43

VERIFY_JOB_REPO_URL_CONTROL_FILE = _control_segment_path('verify_job_repo_url')

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

44

RESET_CONTROL_FILE = _control_segment_path('reset')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

45

46

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

47

# by default provide a stub that generates no site data

48

def _get_site_job_data_dummy(job):

return {}

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

52

class status_indenter(base_job.status_indenter):

53

"""Provide a simple integer-backed status indenter."""

def __init__(self):

self._indent = 0

@property

def indent(self):

return self._indent

def increment(self):

self._indent += 1

def decrement(self):

self._indent -= 1

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

71

def get_context(self):

72

"""Returns a context object for use by job.get_record_context."""

73

class context(object):

74

def __init__(self, indenter, indent):

75

self._indenter = indenter

76

self._indent = indent

77

def restore(self):

78

self._indenter._indent = self._indent

79

return context(self, self._indent)

80

81

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

82

class server_job_record_hook(object):

83

"""The job.record hook for server job. Used to inject WARN messages from

84

the console or vlm whenever new logs are written, and to echo any logs

85

to INFO level logging. Implemented as a class so that it can use state to

86

block recursive calls, so that the hook can call job.record itself to

87

log WARN messages.

88

89

Depends on job._read_warnings and job._logger.

90

"""

91

def __init__(self, job):

92

self._job = job

93

self._being_called = False

94

95

96

def __call__(self, entry):

97

"""A wrapper around the 'real' record hook, the _hook method, which

98

prevents recursion. This isn't making any effort to be threadsafe,

99

the intent is to outright block infinite recursion via a

100

job.record->_hook->job.record->_hook->job.record... chain."""

101

if self._being_called:

102

return

103

self._being_called = True

104

try:

105

self._hook(self._job, entry)

106

finally:

107

self._being_called = False

@staticmethod

def _hook(job, entry):

112

"""The core hook, which can safely call job.record."""

113

entries = []

114

# poll all our warning loggers for new warnings

115

for timestamp, msg in job._read_warnings():

116

warning_entry = base_job.status_log_entry(

117

'WARN', None, None, msg, {}, timestamp=timestamp)

118

entries.append(warning_entry)

119

job.record_entry(warning_entry)

120

# echo rendered versions of all the status logs to info

121

entries.append(entry)

122

for entry in entries:

123

rendered_entry = job._logger.render_entry(entry)

124

logging.info(rendered_entry)

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

125

job._parse_status(rendered_entry)

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

126

127

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

128

class base_server_job(base_job.base_job):

129

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

130

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

131

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

140

"""

141

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

142

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

143

144

def __init__(self, control, args, resultdir, label, user, machines,

145

client=False, parse_job='',

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

146

ssh_user='root', ssh_port=22, ssh_pass='', test_retry=0,

Christopher Wiley

2013-07-09 11:02:27 -0700

[diff] [blame^]

147

group_name='', tag='', disable_sysinfo=False,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

148

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

149

"""

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

150

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

151

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

152

@param control: The pathname of the control file.

153

@param args: Passed to the control file.

154

@param resultdir: Where to throw the results.

155

@param label: Description of the job.

156

@param user: Username for the job (email address).

157

@param client: True if this is a client-side control file.

158

@param parse_job: string, if supplied it is the job execution tag that

159

the results will be passed through to the TKO parser with.

160

@param ssh_user: The SSH username. [root]

161

@param ssh_port: The SSH port number. [22]

162

@param ssh_pass: The SSH passphrase, if needed.

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

163

@param test_retry: The number of times to retry a test if the test did

164

not complete successfully.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

165

@param group_name: If supplied, this will be written out as

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

166

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

167

@param tag: The job execution tag from the scheduler. [optional]

Christopher Wiley

2013-07-09 11:02:27 -0700

[diff] [blame^]

168

@param disable_sysinfo: Whether we should disable the sysinfo step of

169

tests for a modest shortening of test time. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

170

@param control_filename: The filename where the server control file

171

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

172

"""

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

173

super(base_server_job, self).__init__(resultdir=resultdir,

174

test_retry=test_retry)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

175

path = os.path.dirname(__file__)

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

176

self.test_retry = test_retry

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

177

self.control = control

178

self._uncollected_log_file = os.path.join(self.resultdir,

179

'uncollected_logs')

180

debugdir = os.path.join(self.resultdir, 'debug')

181

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

188

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

189

self.args = args

Peter Mayo

7a87576

2012-06-13 14:38:15 -0400

[diff] [blame]

190

self.label = label

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

191

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

192

self._client = client

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

193

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

194

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

195

self._ssh_user = ssh_user

196

self._ssh_port = ssh_port

197

self._ssh_pass = ssh_pass

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

198

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

199

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

200

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

201

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

202

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

203

self._control_filename = control_filename

Christopher Wiley

2013-07-09 11:02:27 -0700

[diff] [blame^]

204

self._disable_sysinfo = disable_sysinfo

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

205

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

206

self.logging = logging_manager.get_logging_manager(

207

manage_stdout_and_stderr=True, redirect_fds=True)

208

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

209

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

210

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

211

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

212

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

213

job_data = {'label' : label, 'user' : user,

214

'hostname' : ','.join(machines),

Eric Li

861b2d5

2011-02-04 14:50:35 -0800

[diff] [blame]

215

'drone' : platform.node(),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

216

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

217

'job_started' : str(int(time.time()))}

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

218

if group_name:

219

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

220

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

221

# only write these keyvals out on the first job in a resultdir

222

if 'job_started' not in utils.read_keyval(self.resultdir):

223

job_data.update(get_site_job_data(self))

224

utils.write_keyval(self.resultdir, job_data)

225

226

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

227

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

228

self.pkgmgr = packages.PackageManager(

229

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

230

self.num_tests_run = 0

231

self.num_tests_failed = 0

232

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

233

self._register_subcommand_hooks()

234

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

235

# these components aren't usable on the server

236

self.bootloader = None

237

self.harness = None

238

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

239

# set up the status logger

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

240

self._indenter = status_indenter()

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

241

self._logger = base_job.status_logger(

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

242

self, self._indenter, 'status.log', 'status.log',

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

243

record_hook=server_job_record_hook(self))

244

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

245

246

@classmethod

247

def _find_base_directories(cls):

248

"""

249

Determine locations of autodir, clientdir and serverdir. Assumes

250

that this file is located within serverdir and uses __file__ along

251

with relative paths to resolve the location.

252

"""

253

serverdir = os.path.abspath(os.path.dirname(__file__))

254

autodir = os.path.normpath(os.path.join(serverdir, '..'))

255

clientdir = os.path.join(autodir, 'client')

256

return autodir, clientdir, serverdir

257

258

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

259

def _find_resultdir(self, resultdir, *args, **dargs):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

260

"""

261

Determine the location of resultdir. For server jobs we expect one to

262

always be explicitly passed in to __init__, so just return that.

263

"""

264

if resultdir:

265

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

269

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

270

def _get_status_logger(self):

271

"""Return a reference to the status logger."""

return self._logger

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

275

@staticmethod

276

def _load_control_file(path):

277

f = open(path)

278

try:

279

control_file = f.read()

280

finally:

281

f.close()

282

return re.sub('\r', '', control_file)

283

284

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

285

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

286

"""

287

Register some hooks into the subcommand modules that allow us

288

to properly clean up self.hosts created in forked subprocesses.

289

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

290

def on_fork(cmd):

291

self._existing_hosts_on_fork = set(self.hosts)

292

def on_join(cmd):

293

new_hosts = self.hosts - self._existing_hosts_on_fork

294

for host in new_hosts:

295

host.close()

296

subcommand.subcommand.register_fork_hook(on_fork)

297

subcommand.subcommand.register_join_hook(on_join)

298

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

299

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

300

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

301

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

302

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

303

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

304

the database if necessary.

305

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

306

if not self._using_parser:

307

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

308

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

309

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

310

parse_log = open(parse_log, 'w', 0)

311

tko_utils.redirect_parser_debugging(parse_log)

312

# create a job model object and set up the db

313

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

314

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

315

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

316

self.parser.start(self.job_model)

317

# check if a job already exists in the db and insert it if

318

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

319

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

320

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

321

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

322

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

323

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

324

self.job_model.index = job_idx

325

self.job_model.machine_idx = machine_idx

326

327

328

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

329

"""

330

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

331

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

332

remaining test results to the results db)

333

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

334

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

335

return

336

final_tests = self.parser.end()

337

for test in final_tests:

338

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

339

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

340

341

342

def verify(self):

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

343

"""Verify machines are all ssh-able."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

344

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

345

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

346

if self.resultdir:

347

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

348

try:

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

349

namespace = {'machines' : self.machines, 'job' : self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

350

'ssh_user' : self._ssh_user,

351

'ssh_port' : self._ssh_port,

352

'ssh_pass' : self._ssh_pass}

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

353

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

354

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

355

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

356

self.record('ABORT', None, None, msg)

raise

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

360

def reset(self):

361

"""Reset machines by first cleanup then verify each machine."""

362

if not self.machines:

363

raise error.AutoservError('No machines specified to reset.')

364

if self.resultdir:

365

os.chdir(self.resultdir)

366

367

try:

368

namespace = {'machines' : self.machines, 'job' : self,

369

'ssh_user' : self._ssh_user,

370

'ssh_port' : self._ssh_port,

371

'ssh_pass' : self._ssh_pass}

372

self._execute_code(RESET_CONTROL_FILE, namespace, protect=False)

373

except Exception as e:

374

msg = ('Reset failed\n' + str(e) + '\n' +

375

traceback.format_exc())

376

self.record('ABORT', None, None, msg)

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

380

def repair(self, host_protection):

381

if not self.machines:

382

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

383

if self.resultdir:

384

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

385

namespace = {'machines': self.machines, 'job': self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

386

'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,

387

'ssh_pass': self._ssh_pass,

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

388

'protection_level': host_protection}

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

389

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

390

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

391

392

Alex Miller

cb79ba7

2013-05-29 14:43:00 -0700

[diff] [blame]

393

def provision(self, labels):

394

"""

395

Provision all hosts to match |labels|.

396

397

@param labels: A comma seperated string of labels to provision the

host to.

"""

namespace = {'provision_labels': labels}

402

control = self._load_control_file(PROVISION_CONTROL_FILE)

403

self.run(control=control, namespace=namespace)

404

405

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

406

def precheck(self):

407

"""

408

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

414

"""

415

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

421

"""

422

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

428

"""

429

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

434

def _make_parallel_wrapper(self, function, machines, log):

435

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

436

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

437

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

438

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

439

self._parse_job += "/" + machine

440

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

441

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

442

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

443

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

444

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

445

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

446

result = function(machine)

447

self.cleanup_parser()

448

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

449

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

450

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

451

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

452

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

453

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

454

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

455

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

456

result = function(machine)

457

return result

458

else:

459

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

464

return_results=False):

465

"""

466

Run 'function' using parallel_simple, with an extra wrapper to handle

467

the necessary setup for continuous parsing, if possible. If continuous

468

parsing is already properly initialized then this should just work.

469

470

@param function: A callable to run in parallel given each machine.

471

@param machines: A list of machine names to be passed one per subcommand

472

invocation of function.

473

@param log: If True, output will be written to output in a subdirectory

474

named after each machine.

475

@param timeout: Seconds after which the function call should timeout.

476

@param return_results: If True instead of an AutoServError being raised

477

on any error a list of the results|exceptions from the function

478

called on each arg is returned. [default: False]

479

480

@raises error.AutotestError: If any of the functions failed.

481

"""

482

wrapper = self._make_parallel_wrapper(function, machines, log)

483

return subcommand.parallel_simple(wrapper, machines,

484

log=log, timeout=timeout,

485

return_results=return_results)

486

487

488

def parallel_on_machines(self, function, machines, timeout=None):

489

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

490

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

491

@param machines: A list of machines to call function(machine) on.

492

@param timeout: Seconds after which the function call should timeout.

493

494

@returns A list of machines on which function(machine) returned

495

without raising an exception.

496

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

497

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

498

return_results=True)

499

success_machines = []

500

for result, machine in itertools.izip(results, machines):

501

if not isinstance(result, Exception):

502

success_machines.append(machine)

503

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

504

505

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

506

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

507

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

508

collect_crashdumps=True, namespace={}, control=None,

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame]

509

control_file_dir=None, verify_job_repo_url=False,

Christopher Wiley

f594c5e

2013-07-03 18:25:30 -0700

[diff] [blame]

510

only_collect_crashinfo=False, skip_crash_collection=False):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

511

# for a normal job, make sure the uncollected logs file exists

512

# for a crashinfo-only run it should already exist, bail out otherwise

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

513

created_uncollected_logs = False

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

514

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

515

if only_collect_crashinfo:

516

# if this is a crashinfo-only run, and there were no existing

517

# uncollected logs, just bail out early

518

logging.info("No existing uncollected logs, "

519

"skipping crashinfo collection")

520

return

521

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

522

log_file = open(self._uncollected_log_file, "w")

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

523

pickle.dump([], log_file)

524

log_file.close()

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

525

created_uncollected_logs = True

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

526

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

527

# use a copy so changes don't affect the original dictionary

528

namespace = namespace.copy()

529

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

530

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

531

if self.control is None:

532

control = ''

533

else:

534

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

535

if control_file_dir is None:

536

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

537

538

self.aborted = False

539

namespace['machines'] = machines

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

540

namespace['args'] = self.args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

541

namespace['job'] = self

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

542

namespace['ssh_user'] = self._ssh_user

543

namespace['ssh_port'] = self._ssh_port

544

namespace['ssh_pass'] = self._ssh_pass

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

545

test_start_time = int(time.time())

546

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

547

if self.resultdir:

548

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

549

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

550

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

551

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

552

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

553

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

554

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

555

try:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

556

try:

557

if install_before and machines:

558

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

559

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

560

if only_collect_crashinfo:

561

return

562

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame]

563

# If the verify_job_repo_url option is set but we're unable

564

# to actually verify that the job_repo_url contains the autotest

565

# package, this job will fail.

566

if verify_job_repo_url:

567

self._execute_code(VERIFY_JOB_REPO_URL_CONTROL_FILE,

568

namespace)

569

else:

570

logging.warning('Not checking if job_repo_url contains '

571

'autotest packages on %s', machines)

572

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

573

# determine the dir to write the control files to

574

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

575

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

576

if cfd_specified:

577

temp_control_file_dir = None

578

else:

579

temp_control_file_dir = tempfile.mkdtemp(

580

suffix='temp_control_file_dir')

581

control_file_dir = temp_control_file_dir

582

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

583

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

584

client_control_file = os.path.join(control_file_dir,

585

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

586

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

587

namespace['control'] = control

588

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

589

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

590

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

591

else:

592

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

593

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

594

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

595

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

596

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

597

# no error occured, so we don't need to collect crashinfo

598

collect_crashinfo = False

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

599

except Exception, e:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

600

try:

601

logging.exception(

602

'Exception escaped control file, job aborting:')

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

603

self.record('INFO', None, None, str(e),

604

{'job_abort_reason': str(e)})

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

605

except:

606

pass # don't let logging exceptions here interfere

607

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

608

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

609

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

610

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

611

try:

612

shutil.rmtree(temp_control_file_dir)

613

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

614

logging.warn('Could not remove temp directory %s: %s',

615

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

616

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

617

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

618

namespace['test_start_time'] = test_start_time

Christopher Wiley

f594c5e

2013-07-03 18:25:30 -0700

[diff] [blame]

619

if skip_crash_collection:

620

logging.info('Skipping crash dump/info collection '

621

'as requested.')

622

elif collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

623

# includes crashdumps

624

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

625

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

626

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

627

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

628

if cleanup and machines:

629

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

Chris Sosa

f4d43ff

2012-10-30 11:21:05 -0700

[diff] [blame]

630

if self._uncollected_log_file and created_uncollected_logs:

631

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

632

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

633

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

634

635

636

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

637

"""

638

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

639

640

tag

641

tag to add to testname

642

url

643

url of the test to run

644

"""

Christopher Wiley

2013-07-09 11:02:27 -0700

[diff] [blame^]

645

if self._disable_sysinfo:

646

dargs['disable_sysinfo'] = True

647

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

648

group, testname = self.pkgmgr.get_package_name(url, 'test')

649

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

650

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

655

except error.TestBaseException, e:

656

self.record(e.exit_status, subdir, testname, str(e))

657

raise

658

except Exception, e:

659

info = str(e) + "\n" + traceback.format_exc()

660

self.record('FAIL', subdir, testname, info)

661

raise

662

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

663

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

664

665

result, exc_info = self._run_group(testname, subdir, group_func)

666

if exc_info and isinstance(exc_info[1], error.TestBaseException):

667

return False

668

elif exc_info:

669

raise exc_info[0], exc_info[1], exc_info[2]

670

else:

671

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

672

673

674

def _run_group(self, name, subdir, function, *args, **dargs):

675

"""\

676

Underlying method for running something inside of a group.

677

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

678

result, exc_info = None, None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

679

try:

680

self.record('START', subdir, name)

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

681

result = function(*args, **dargs)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

682

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

683

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

684

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

685

except Exception, e:

686

err_msg = str(e) + '\n'

687

err_msg += traceback.format_exc()

688

self.record('END ABORT', subdir, name, err_msg)

689

raise error.JobError(name + ' failed\n' + traceback.format_exc())

690

else:

691

self.record('END GOOD', subdir, name)

692

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

693

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

694

695

696

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

702

"""

703

704

name = function.__name__

705

706

# Allow the tag for the group to be specified.

707

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

711

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

712

713

714

def run_reboot(self, reboot_func, get_kernel_func):

715

"""\

716

A specialization of run_group meant specifically for handling

717

a reboot. Includes support for capturing the kernel version

718

after the reboot.

719

720

reboot_func: a function that carries out the reboot

721

722

get_kernel_func: a function that returns a string

723

representing the kernel version.

724

"""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

725

try:

726

self.record('START', None, 'reboot')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

727

reboot_func()

728

except Exception, e:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

729

err_msg = str(e) + '\n' + traceback.format_exc()

730

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

731

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

732

else:

733

kernel = get_kernel_func()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

734

self.record('END GOOD', None, 'reboot',

Dale Curtis

74a314b

2011-06-23 14:55:46 -0700

[diff] [blame]

735

optional_fields={"kernel": kernel})

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

736

737

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

738

def run_control(self, path):

739

"""Execute a control file found at path (relative to the autotest

740

path). Intended for executing a control file within a control file,

741

not for running the top-level job control file."""

742

path = os.path.join(self.autodir, path)

743

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

744

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

745

746

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

747

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

748

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

753

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

754

755

756

def _add_sysinfo_loggable(self, loggable, on_every_test):

757

if on_every_test:

758

self.sysinfo.test_loggables.add(loggable)

759

else:

760

self.sysinfo.boot_loggables.add(loggable)

761

762

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

763

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

764

"""Poll all the warning loggers and extract any new warnings that have

765

been logged. If the warnings belong to a category that is currently

766

disabled, this method will discard them and they will no longer be

767

retrievable.

768

769

Returns a list of (timestamp, message) tuples, where timestamp is an

770

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

771

warnings = []

772

while True:

773

# pull in a line of output from every logger that has

774

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

775

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

776

closed_loggers = set()

777

for logger in loggers:

778

line = logger.readline()

779

# record any broken pipes (aka line == empty)

780

if len(line) == 0:

781

closed_loggers.add(logger)

782

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

783

# parse out the warning

784

timestamp, msgtype, msg = line.split('\t', 2)

785

timestamp = int(timestamp)

786

# if the warning is valid, add it to the results

787

if self.warning_manager.is_valid(timestamp, msgtype):

788

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

789

790

# stop listening to loggers that are closed

791

self.warning_loggers -= closed_loggers

792

793

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

802

def _unique_subdirectory(self, base_subdirectory_name):

803

"""Compute a unique results subdirectory based on the given name.

804

805

Appends base_subdirectory_name with a number as necessary to find a

806

directory name that doesn't already exist.

807

"""

808

subdirectory = base_subdirectory_name

809

counter = 1

810

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

811

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

816

def get_record_context(self):

817

"""Returns an object representing the current job.record context.

818

819

The object returned is an opaque object with a 0-arg restore method

820

which can be called to restore the job.record context (i.e. indentation)

821

to the current level. The intention is that it should be used when

822

something external which generate job.record calls (e.g. an autotest

823

client) can fail catastrophically and the server job record state

824

needs to be reset to its original "known good" state.

825

826

@return: A context object with a 0-arg restore() method."""

827

return self._indenter.get_context()

828

829

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

830

def record_summary(self, status_code, test_name, reason='', attributes=None,

831

distinguishing_attributes=(), child_test_ids=None):

832

"""Record a summary test result.

833

834

@param status_code: status code string, see

835

common_lib.log.is_valid_status()

836

@param test_name: name of the test

837

@param reason: (optional) string providing detailed reason for test

838

outcome

839

@param attributes: (optional) dict of string keyvals to associate with

840

this result

841

@param distinguishing_attributes: (optional) list of attribute names

842

that should be used to distinguish identically-named test

843

results. These attributes should be present in the attributes

844

parameter. This is used to generate user-friendly subdirectory

845

names.

846

@param child_test_ids: (optional) list of test indices for test results

847

used in generating this result.

848

"""

849

subdirectory_name_parts = [test_name]

850

for attribute in distinguishing_attributes:

851

assert attributes

852

assert attribute in attributes, '%s not in %s' % (attribute,

853

attributes)

854

subdirectory_name_parts.append(attributes[attribute])

855

base_subdirectory_name = '.'.join(subdirectory_name_parts)

856

857

subdirectory = self._unique_subdirectory(base_subdirectory_name)

858

subdirectory_path = os.path.join(self.resultdir, subdirectory)

859

os.mkdir(subdirectory_path)

860

861

self.record(status_code, subdirectory, test_name,

862

status=reason, optional_fields={'is_summary': True})

863

864

if attributes:

865

utils.write_keyval(subdirectory_path, attributes)

866

867

if child_test_ids:

868

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

869

summary_data = {'child_test_ids': ids_string}

870

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

874

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

875

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

876

self.record("INFO", None, None,

877

"disabling %s warnings" % warning_type,

878

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

879

880

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

881

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

882

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

883

self.record("INFO", None, None,

884

"enabling %s warnings" % warning_type,

885

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

886

887

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

888

def get_status_log_path(self, subdir=None):

889

"""Return the path to the job status log.

890

891

@param subdir - Optional paramter indicating that you want the path

892

to a subdirectory status log.

893

894

@returns The path where the status log should be.

895

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

896

if self.resultdir:

897

if subdir:

898

return os.path.join(self.resultdir, subdir, "status.log")

899

else:

900

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

901

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

902

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

903

904

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

905

def _update_uncollected_logs_list(self, update_func):

906

"""Updates the uncollected logs list in a multi-process safe manner.

907

908

@param update_func - a function that updates the list of uncollected

909

logs. Should take one parameter, the list to be updated.

910

"""

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

911

# Skip log collection if file _uncollected_log_file does not exist.

912

if not (self._uncollected_log_file and

913

os.path.exists(self._uncollected_log_file)):

914

return

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

915

if self._uncollected_log_file:

916

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

917

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

918

try:

919

uncollected_logs = pickle.load(log_file)

920

update_func(uncollected_logs)

921

log_file.seek(0)

922

log_file.truncate()

923

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

924

log_file.flush()

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

925

finally:

926

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

931

"""Adds a new set of client logs to the list of uncollected logs,

932

to allow for future log recovery.

933

934

@param host - the hostname of the machine holding the logs

935

@param remote_path - the directory on the remote machine holding logs

936

@param local_path - the local directory to copy the logs into

937

"""

938

def update_func(logs_list):

939

logs_list.append((hostname, remote_path, local_path))

940

self._update_uncollected_logs_list(update_func)

941

942

943

def remove_client_log(self, hostname, remote_path, local_path):

944

"""Removes a set of client logs from the list of uncollected logs,

945

to allow for future log recovery.

946

947

@param host - the hostname of the machine holding the logs

948

@param remote_path - the directory on the remote machine holding logs

949

@param local_path - the local directory to copy the logs into

950

"""

951

def update_func(logs_list):

952

logs_list.remove((hostname, remote_path, local_path))

953

self._update_uncollected_logs_list(update_func)

954

955

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

956

def get_client_logs(self):

957

"""Retrieves the list of uncollected logs, if it exists.

958

959

@returns A list of (host, remote_path, local_path) tuples. Returns

960

an empty list if no uncollected logs file exists.

961

"""

962

log_exists = (self._uncollected_log_file and

963

os.path.exists(self._uncollected_log_file))

964

if log_exists:

965

return pickle.load(open(self._uncollected_log_file))

else:

return []

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

970

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

971

"""

972

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

973

974

This sets up the control file API by importing modules and making them

975

available under the appropriate names within namespace.

976

977

For use by _execute_code().

978

979

Args:

980

namespace: The namespace dictionary to fill in.

981

protect: Boolean. If True (the default) any operation that would

982

clobber an existing entry in namespace will cause an error.

983

Raises:

984

error.AutoservError: When a name would be clobbered by import.

985

"""

986

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

987

"""

988

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

989

990

Args:

991

module_name: The string module name.

992

names: A limiting list of names to import from module_name. If

993

empty (the default), all names are imported from the module

994

similar to a "from foo.bar import *" statement.

995

Raises:

996

error.AutoservError: When a name being imported would clobber

997

a name already in namespace.

998

"""

999

module = __import__(module_name, {}, {}, names)

1000

1001

# No names supplied? Import * from the lowest level module.

1002

# (Ugh, why do I have to implement this part myself?)

1003

if not names:

1004

for submodule_name in module_name.split('.')[1:]:

1005

module = getattr(module, submodule_name)

1006

if hasattr(module, '__all__'):

1007

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

1012

# doesn't override anything that already exists.

1013

for name in names:

1014

# Check for conflicts to help prevent future problems.

1015

if name in namespace and protect:

1016

if namespace[name] is not getattr(module, name):

1017

raise error.AutoservError('importing name '

1018

'%s from %s %r would override %r' %

1019

(name, module_name, getattr(module, name),

1020

namespace[name]))

1021

else:

1022

# Encourage cleanliness and the use of __all__ for a

1023

# more concrete API with less surprises on '*' imports.

1024

warnings.warn('%s (%r) being imported from %s for use '

1025

'in server control files is not the '

1026

'first occurrance of that import.' %

1027

(name, namespace[name], module_name))

1028

1029

namespace[name] = getattr(module, name)

1030

1031

1032

# This is the equivalent of prepending a bunch of import statements to

1033

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

1034

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1035

_import_names('autotest_lib.server',

1036

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

1037

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

1038

_import_names('autotest_lib.server.subcommand',

1039

('parallel', 'parallel_simple', 'subcommand'))

1040

_import_names('autotest_lib.server.utils',

1041

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

1042

_import_names('autotest_lib.client.common_lib.error')

1043

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

1044

1045

# Inject ourself as the job object into other classes within the API.

1046

# (Yuck, this injection is a gross thing be part of a public API. -gps)

1047

#

1048

# XXX Base & SiteAutotest do not appear to use .job. Who does?

1049

namespace['autotest'].Autotest.job = self

1050

# server.hosts.base_classes.Host uses .job.

1051

namespace['hosts'].Host.job = self

Eric Li

10222b8

2010-11-24 09:33:15 -0800

[diff] [blame]

1052

namespace['hosts'].factory.ssh_user = self._ssh_user

1053

namespace['hosts'].factory.ssh_port = self._ssh_port

1054

namespace['hosts'].factory.ssh_pass = self._ssh_pass

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1055

1056

1057

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1058

"""

1059

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1060

1061

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

1066

namespace: A dict containing names to make available during execution.

1067

protect: Boolean. If True (the default) a copy of the namespace dict

1068

is used during execution to prevent the code from modifying its

1069

contents outside of this function. If False the raw dict is

1070

passed in and modifications will be allowed.

1071

"""

1072

if protect:

1073

namespace = namespace.copy()

1074

self._fill_server_control_namespace(namespace, protect=protect)

1075

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1076

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1077

machines_text = '\n'.join(self.machines) + '\n'

1078

# Only rewrite the file if it does not match our machine list.

1079

try:

1080

machines_f = open(MACHINES_FILENAME, 'r')

1081

existing_machines_text = machines_f.read()

1082

machines_f.close()

1083

except EnvironmentError:

1084

existing_machines_text = None

1085

if machines_text != existing_machines_text:

1086

utils.open_write_close(MACHINES_FILENAME, machines_text)

1087

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1088

1089

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1090

def _parse_status(self, new_line):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1091

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1092

return

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1093

new_tests = self.parser.process_lines([new_line])

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1094

for test in new_tests:

1095

self.__insert_test(test)

1096

1097

1098

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1099

"""

1100

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1101

database. This method will not raise an exception, even if an

1102

error occurs during the insert, to avoid failing a test

1103

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1104

self.num_tests_run += 1

1105

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1106

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1107

try:

1108

self.results_db.insert_test(self.job_model, test)

1109

except Exception:

1110

msg = ("WARNING: An unexpected error occured while "

1111

"inserting test results into the database. "

1112

"Ignoring error.\n" + traceback.format_exc())

1113

print >> sys.stderr, msg

1114

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1115

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1116

def preprocess_client_state(self):

1117

"""

1118

Produce a state file for initializing the state of a client job.

1119

1120

Creates a new client state file with all the current server state, as

1121

well as some pre-set client state.

1122

1123

@returns The path of the file the state was written into.

1124

"""

1125

# initialize the sysinfo state

1126

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1127

1128

# dump the state out to a tempfile

1129

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1130

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1131

1132

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1133

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1138

"""

1139

Update the state of this job with the state from a client job.

1140

1141

Updates the state of the server side of a job with the final state

1142

of a client job that was run. Updates the non-client-specific state,

1143

pulls in some specific bits from the client-specific state, and then

1144

discards the rest. Removes the state file afterwards

1145

1146

@param state_file A path to the state file from the client.

1147

"""

1148

# update the on-disk state

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1149

try:

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1150

self._state.read_from_file(state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1151

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1152

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1153

# ignore file-not-found errors

1154

if e.errno != errno.ENOENT:

1155

raise

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1156

else:

1157

logging.debug('Client state file %s not found', state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1158

1159

# update the sysinfo state

1160

if self._state.has('client', 'sysinfo'):

1161

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1162

1163

# drop all the client-specific state

1164

self._state.discard_namespace('client')

1165

1166

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

1167

def clear_all_known_hosts(self):

1168

"""Clears known hosts files for all AbstractSSHHosts."""

1169

for host in self.hosts:

1170

if isinstance(host, abstract_ssh.AbstractSSHHost):

1171

host.clear_known_hosts()

1172

1173

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1174

class warning_manager(object):

1175

"""Class for controlling warning logs. Manages the enabling and disabling

1176

of warnings."""

1177

def __init__(self):

1178

# a map of warning types to a list of disabled time intervals

1179

self.disabled_warnings = {}

1180

1181

1182

def is_valid(self, timestamp, warning_type):

1183

"""Indicates if a warning (based on the time it occured and its type)

1184

is a valid warning. A warning is considered "invalid" if this type of

1185

warning was marked as "disabled" at the time the warning occured."""

1186

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1187

for start, end in disabled_intervals:

1188

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1194

"""As of now, disables all further warnings of this type."""

1195

intervals = self.disabled_warnings.setdefault(warning_type, [])

1196

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1197

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1198

1199

1200

def enable_warnings(self, warning_type, current_time_func=time.time):

1201

"""As of now, enables all further warnings of this type."""

1202

intervals = self.disabled_warnings.get(warning_type, [])

1203

if intervals and intervals[-1][1] is None:

jadmanski