Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2013-04-12 09:31:29 -0700

[diff] [blame]

1

# pylint: disable-msg=C0111

2

Paul Pendlebury

7c1fdcf

2011-05-04 12:39:15 -0700

[diff] [blame]

3

4

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

mbligh

57e7866

2008-06-17 19:53:49 +0000

[diff] [blame]

6

"""

7

The main job wrapper for the server side.

8

9

This is the core infrastructure. Derived from the client side job.py

10

11

Copyright Martin J. Bligh, Andy Whitcroft 2007

12

"""

13

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

14

import getpass, os, sys, re, tempfile, time, select, platform

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

15

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

16

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

17

from autotest_lib.client.common_lib import base_job

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

18

from autotest_lib.client.common_lib import error, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

19

from autotest_lib.client.common_lib import logging_manager

Paul Pendlebury

5759356

2011-06-15 10:45:49 -0700

[diff] [blame]

20

from autotest_lib.server import test, subcommand, profilers

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

21

from autotest_lib.server.hosts import abstract_ssh

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

22

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

23

24

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

25

def _control_segment_path(name):

26

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

27

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

28

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

29

30

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

31

CLIENT_CONTROL_FILENAME = 'control'

32

SERVER_CONTROL_FILENAME = 'control.srv'

33

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

34

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

35

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

36

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

37

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

38

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

39

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

40

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

41

REPAIR_CONTROL_FILE = _control_segment_path('repair')

Alex Miller

cb79ba7

2013-05-29 14:43:00 -0700

[diff] [blame]

42

PROVISION_CONTROL_FILE = _control_segment_path('provision')

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame]

43

VERIFY_JOB_REPO_URL_CONTROL_FILE = _control_segment_path('verify_job_repo_url')

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

44

RESET_CONTROL_FILE = _control_segment_path('reset')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

45

46

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

47

# by default provide a stub that generates no site data

48

def _get_site_job_data_dummy(job):

return {}

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

52

class status_indenter(base_job.status_indenter):

53

"""Provide a simple integer-backed status indenter."""

def __init__(self):

self._indent = 0

@property

def indent(self):

return self._indent

def increment(self):

self._indent += 1

def decrement(self):

self._indent -= 1

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

71

def get_context(self):

72

"""Returns a context object for use by job.get_record_context."""

73

class context(object):

74

def __init__(self, indenter, indent):

75

self._indenter = indenter

76

self._indent = indent

77

def restore(self):

78

self._indenter._indent = self._indent

79

return context(self, self._indent)

80

81

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

82

class server_job_record_hook(object):

83

"""The job.record hook for server job. Used to inject WARN messages from

84

the console or vlm whenever new logs are written, and to echo any logs

85

to INFO level logging. Implemented as a class so that it can use state to

86

block recursive calls, so that the hook can call job.record itself to

87

log WARN messages.

88

89

Depends on job._read_warnings and job._logger.

90

"""

91

def __init__(self, job):

92

self._job = job

93

self._being_called = False

94

95

96

def __call__(self, entry):

97

"""A wrapper around the 'real' record hook, the _hook method, which

98

prevents recursion. This isn't making any effort to be threadsafe,

99

the intent is to outright block infinite recursion via a

100

job.record->_hook->job.record->_hook->job.record... chain."""

101

if self._being_called:

102

return

103

self._being_called = True

104

try:

105

self._hook(self._job, entry)

106

finally:

107

self._being_called = False

@staticmethod

def _hook(job, entry):

112

"""The core hook, which can safely call job.record."""

113

entries = []

114

# poll all our warning loggers for new warnings

115

for timestamp, msg in job._read_warnings():

116

warning_entry = base_job.status_log_entry(

117

'WARN', None, None, msg, {}, timestamp=timestamp)

118

entries.append(warning_entry)

119

job.record_entry(warning_entry)

120

# echo rendered versions of all the status logs to info

121

entries.append(entry)

122

for entry in entries:

123

rendered_entry = job._logger.render_entry(entry)

124

logging.info(rendered_entry)

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

125

job._parse_status(rendered_entry)

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

126

127

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

128

class base_server_job(base_job.base_job):

129

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

130

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

131

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

140

"""

141

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

142

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

143

144

def __init__(self, control, args, resultdir, label, user, machines,

145

client=False, parse_job='',

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

146

ssh_user='root', ssh_port=22, ssh_pass='', test_retry=0,

Christopher Wiley

2013-07-09 11:02:27 -0700

[diff] [blame]

147

group_name='', tag='', disable_sysinfo=False,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

148

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

149

"""

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

150

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

151

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

152

@param control: The pathname of the control file.

153

@param args: Passed to the control file.

154

@param resultdir: Where to throw the results.

155

@param label: Description of the job.

156

@param user: Username for the job (email address).

157

@param client: True if this is a client-side control file.

158

@param parse_job: string, if supplied it is the job execution tag that

159

the results will be passed through to the TKO parser with.

160

@param ssh_user: The SSH username. [root]

161

@param ssh_port: The SSH port number. [22]

162

@param ssh_pass: The SSH passphrase, if needed.

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

163

@param test_retry: The number of times to retry a test if the test did

164

not complete successfully.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

165

@param group_name: If supplied, this will be written out as

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

166

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

167

@param tag: The job execution tag from the scheduler. [optional]

Christopher Wiley

2013-07-09 11:02:27 -0700

[diff] [blame]

168

@param disable_sysinfo: Whether we should disable the sysinfo step of

169

tests for a modest shortening of test time. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

170

@param control_filename: The filename where the server control file

171

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

172

"""

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

173

super(base_server_job, self).__init__(resultdir=resultdir,

174

test_retry=test_retry)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

175

path = os.path.dirname(__file__)

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

176

self.test_retry = test_retry

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

177

self.control = control

178

self._uncollected_log_file = os.path.join(self.resultdir,

179

'uncollected_logs')

180

debugdir = os.path.join(self.resultdir, 'debug')

181

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

188

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

189

self.args = args

Peter Mayo

7a87576

2012-06-13 14:38:15 -0400

[diff] [blame]

190

self.label = label

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

191

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

192

self._client = client

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

193

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

194

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

195

self._ssh_user = ssh_user

196

self._ssh_port = ssh_port

197

self._ssh_pass = ssh_pass

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

198

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

199

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

200

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

201

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

202

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

203

self._control_filename = control_filename

Christopher Wiley

2013-07-09 11:02:27 -0700

[diff] [blame]

204

self._disable_sysinfo = disable_sysinfo

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

205

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

206

self.logging = logging_manager.get_logging_manager(

207

manage_stdout_and_stderr=True, redirect_fds=True)

208

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

209

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

210

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

211

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

212

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

213

job_data = {'label' : label, 'user' : user,

214

'hostname' : ','.join(machines),

Eric Li

861b2d5

2011-02-04 14:50:35 -0800

[diff] [blame]

215

'drone' : platform.node(),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

216

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

217

'job_started' : str(int(time.time()))}

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

218

if group_name:

219

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

220

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

221

# only write these keyvals out on the first job in a resultdir

222

if 'job_started' not in utils.read_keyval(self.resultdir):

223

job_data.update(get_site_job_data(self))

224

utils.write_keyval(self.resultdir, job_data)

225

226

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

227

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

228

self.pkgmgr = packages.PackageManager(

229

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

230

self.num_tests_run = 0

231

self.num_tests_failed = 0

232

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

233

self._register_subcommand_hooks()

234

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

235

# these components aren't usable on the server

236

self.bootloader = None

237

self.harness = None

238

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

239

# set up the status logger

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

240

self._indenter = status_indenter()

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

241

self._logger = base_job.status_logger(

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

242

self, self._indenter, 'status.log', 'status.log',

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

243

record_hook=server_job_record_hook(self))

244

Dan Shi

b03ea9d

2013-08-15 17:13:27 -0700

[diff] [blame^]

245

# Initialize a flag to indicate DUT failure during the test, e.g.,

246

# unexpected reboot.

247

self.failed_with_device_error = False

248

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

249

250

@classmethod

251

def _find_base_directories(cls):

252

"""

253

Determine locations of autodir, clientdir and serverdir. Assumes

254

that this file is located within serverdir and uses __file__ along

255

with relative paths to resolve the location.

256

"""

257

serverdir = os.path.abspath(os.path.dirname(__file__))

258

autodir = os.path.normpath(os.path.join(serverdir, '..'))

259

clientdir = os.path.join(autodir, 'client')

260

return autodir, clientdir, serverdir

261

262

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

263

def _find_resultdir(self, resultdir, *args, **dargs):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

264

"""

265

Determine the location of resultdir. For server jobs we expect one to

266

always be explicitly passed in to __init__, so just return that.

267

"""

268

if resultdir:

269

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

273

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

274

def _get_status_logger(self):

275

"""Return a reference to the status logger."""

return self._logger

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

279

@staticmethod

280

def _load_control_file(path):

281

f = open(path)

282

try:

283

control_file = f.read()

284

finally:

285

f.close()

286

return re.sub('\r', '', control_file)

287

288

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

289

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

290

"""

291

Register some hooks into the subcommand modules that allow us

292

to properly clean up self.hosts created in forked subprocesses.

293

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

294

def on_fork(cmd):

295

self._existing_hosts_on_fork = set(self.hosts)

296

def on_join(cmd):

297

new_hosts = self.hosts - self._existing_hosts_on_fork

298

for host in new_hosts:

299

host.close()

300

subcommand.subcommand.register_fork_hook(on_fork)

301

subcommand.subcommand.register_join_hook(on_join)

302

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

303

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

304

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

305

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

306

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

307

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

308

the database if necessary.

309

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

310

if not self._using_parser:

311

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

312

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

313

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

314

parse_log = open(parse_log, 'w', 0)

315

tko_utils.redirect_parser_debugging(parse_log)

316

# create a job model object and set up the db

317

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

318

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

319

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

320

self.parser.start(self.job_model)

321

# check if a job already exists in the db and insert it if

322

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

323

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

324

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

325

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

326

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

327

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

328

self.job_model.index = job_idx

329

self.job_model.machine_idx = machine_idx

330

331

332

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

333

"""

334

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

335

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

336

remaining test results to the results db)

337

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

338

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

339

return

340

final_tests = self.parser.end()

341

for test in final_tests:

342

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

343

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

344

345

346

def verify(self):

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

347

"""Verify machines are all ssh-able."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

348

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

349

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

350

if self.resultdir:

351

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

352

try:

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

353

namespace = {'machines' : self.machines, 'job' : self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

354

'ssh_user' : self._ssh_user,

355

'ssh_port' : self._ssh_port,

356

'ssh_pass' : self._ssh_pass}

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

357

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

358

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

359

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

360

self.record('ABORT', None, None, msg)

raise

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

364

def reset(self):

365

"""Reset machines by first cleanup then verify each machine."""

366

if not self.machines:

367

raise error.AutoservError('No machines specified to reset.')

368

if self.resultdir:

369

os.chdir(self.resultdir)

370

371

try:

372

namespace = {'machines' : self.machines, 'job' : self,

373

'ssh_user' : self._ssh_user,

374

'ssh_port' : self._ssh_port,

375

'ssh_pass' : self._ssh_pass}

376

self._execute_code(RESET_CONTROL_FILE, namespace, protect=False)

377

except Exception as e:

378

msg = ('Reset failed\n' + str(e) + '\n' +

379

traceback.format_exc())

380

self.record('ABORT', None, None, msg)

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

384

def repair(self, host_protection):

385

if not self.machines:

386

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

387

if self.resultdir:

388

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

389

namespace = {'machines': self.machines, 'job': self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

390

'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,

391

'ssh_pass': self._ssh_pass,

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

392

'protection_level': host_protection}

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

393

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

394

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

395

396

Alex Miller

cb79ba7

2013-05-29 14:43:00 -0700

[diff] [blame]

397

def provision(self, labels):

398

"""

399

Provision all hosts to match |labels|.

400

401

@param labels: A comma seperated string of labels to provision the

host to.

"""

namespace = {'provision_labels': labels}

406

control = self._load_control_file(PROVISION_CONTROL_FILE)

407

self.run(control=control, namespace=namespace)

408

409

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

410

def precheck(self):

411

"""

412

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

418

"""

419

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

425

"""

426

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

432

"""

433

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

438

def _make_parallel_wrapper(self, function, machines, log):

439

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

440

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

441

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

442

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

443

self._parse_job += "/" + machine

444

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

445

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

446

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

447

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

448

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

449

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

450

result = function(machine)

451

self.cleanup_parser()

452

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

453

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

454

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

455

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

456

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

457

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

458

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

459

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

460

result = function(machine)

461

return result

462

else:

463

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

468

return_results=False):

469

"""

470

Run 'function' using parallel_simple, with an extra wrapper to handle

471

the necessary setup for continuous parsing, if possible. If continuous

472

parsing is already properly initialized then this should just work.

473

474

@param function: A callable to run in parallel given each machine.

475

@param machines: A list of machine names to be passed one per subcommand

476

invocation of function.

477

@param log: If True, output will be written to output in a subdirectory

478

named after each machine.

479

@param timeout: Seconds after which the function call should timeout.

480

@param return_results: If True instead of an AutoServError being raised

481

on any error a list of the results|exceptions from the function

482

called on each arg is returned. [default: False]

483

484

@raises error.AutotestError: If any of the functions failed.

485

"""

486

wrapper = self._make_parallel_wrapper(function, machines, log)

487

return subcommand.parallel_simple(wrapper, machines,

488

log=log, timeout=timeout,

489

return_results=return_results)

490

491

492

def parallel_on_machines(self, function, machines, timeout=None):

493

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

494

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

495

@param machines: A list of machines to call function(machine) on.

496

@param timeout: Seconds after which the function call should timeout.

497

498

@returns A list of machines on which function(machine) returned

499

without raising an exception.

500

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

501

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

502

return_results=True)

503

success_machines = []

504

for result, machine in itertools.izip(results, machines):

505

if not isinstance(result, Exception):

506

success_machines.append(machine)

507

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

508

509

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

510

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

511

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

512

collect_crashdumps=True, namespace={}, control=None,

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame]

513

control_file_dir=None, verify_job_repo_url=False,

Christopher Wiley

f594c5e

2013-07-03 18:25:30 -0700

[diff] [blame]

514

only_collect_crashinfo=False, skip_crash_collection=False):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

515

# for a normal job, make sure the uncollected logs file exists

516

# for a crashinfo-only run it should already exist, bail out otherwise

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

517

created_uncollected_logs = False

Alex Miller

45554f3

2013-08-13 16:48:29 -0700

[diff] [blame]

518

logging.info("I am PID %s", os.getpid())

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

519

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

520

if only_collect_crashinfo:

521

# if this is a crashinfo-only run, and there were no existing

522

# uncollected logs, just bail out early

523

logging.info("No existing uncollected logs, "

524

"skipping crashinfo collection")

525

return

526

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

527

log_file = open(self._uncollected_log_file, "w")

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

528

pickle.dump([], log_file)

529

log_file.close()

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

530

created_uncollected_logs = True

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

531

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

532

# use a copy so changes don't affect the original dictionary

533

namespace = namespace.copy()

534

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

535

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

536

if self.control is None:

537

control = ''

538

else:

539

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

540

if control_file_dir is None:

541

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

542

543

self.aborted = False

544

namespace['machines'] = machines

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

545

namespace['args'] = self.args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

546

namespace['job'] = self

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

547

namespace['ssh_user'] = self._ssh_user

548

namespace['ssh_port'] = self._ssh_port

549

namespace['ssh_pass'] = self._ssh_pass

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

550

test_start_time = int(time.time())

551

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

552

if self.resultdir:

553

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

554

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

555

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

556

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

557

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

558

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

559

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

560

try:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

561

try:

562

if install_before and machines:

563

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

564

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

565

if only_collect_crashinfo:

566

return

567

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame]

568

# If the verify_job_repo_url option is set but we're unable

569

# to actually verify that the job_repo_url contains the autotest

570

# package, this job will fail.

571

if verify_job_repo_url:

572

self._execute_code(VERIFY_JOB_REPO_URL_CONTROL_FILE,

573

namespace)

574

else:

575

logging.warning('Not checking if job_repo_url contains '

576

'autotest packages on %s', machines)

577

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

578

# determine the dir to write the control files to

579

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

580

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

581

if cfd_specified:

582

temp_control_file_dir = None

583

else:

584

temp_control_file_dir = tempfile.mkdtemp(

585

suffix='temp_control_file_dir')

586

control_file_dir = temp_control_file_dir

587

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

588

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

589

client_control_file = os.path.join(control_file_dir,

590

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

591

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

592

namespace['control'] = control

593

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

594

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

595

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

596

else:

597

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

598

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

599

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

600

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

601

Dan Shi

b03ea9d

2013-08-15 17:13:27 -0700

[diff] [blame^]

602

# If no device error occured, no need to collect crashinfo.

603

collect_crashinfo = self.failed_with_device_error

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

604

except Exception, e:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

605

try:

606

logging.exception(

607

'Exception escaped control file, job aborting:')

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

608

self.record('INFO', None, None, str(e),

609

{'job_abort_reason': str(e)})

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

610

except:

611

pass # don't let logging exceptions here interfere

612

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

613

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

614

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

615

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

616

try:

617

shutil.rmtree(temp_control_file_dir)

618

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

619

logging.warn('Could not remove temp directory %s: %s',

620

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

621

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

622

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

623

namespace['test_start_time'] = test_start_time

Christopher Wiley

f594c5e

2013-07-03 18:25:30 -0700

[diff] [blame]

624

if skip_crash_collection:

625

logging.info('Skipping crash dump/info collection '

626

'as requested.')

627

elif collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

628

# includes crashdumps

629

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

630

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

631

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

632

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

633

if cleanup and machines:

634

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

Chris Sosa

f4d43ff

2012-10-30 11:21:05 -0700

[diff] [blame]

635

if self._uncollected_log_file and created_uncollected_logs:

636

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

637

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

638

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

639

640

641

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

642

"""

643

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

644

645

tag

646

tag to add to testname

647

url

648

url of the test to run

649

"""

Christopher Wiley

2013-07-09 11:02:27 -0700

[diff] [blame]

650

if self._disable_sysinfo:

651

dargs['disable_sysinfo'] = True

652

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

653

group, testname = self.pkgmgr.get_package_name(url, 'test')

654

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

655

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

660

except error.TestBaseException, e:

661

self.record(e.exit_status, subdir, testname, str(e))

662

raise

663

except Exception, e:

664

info = str(e) + "\n" + traceback.format_exc()

665

self.record('FAIL', subdir, testname, info)

666

raise

667

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

668

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

669

670

result, exc_info = self._run_group(testname, subdir, group_func)

671

if exc_info and isinstance(exc_info[1], error.TestBaseException):

672

return False

673

elif exc_info:

674

raise exc_info[0], exc_info[1], exc_info[2]

675

else:

676

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

677

678

679

def _run_group(self, name, subdir, function, *args, **dargs):

680

"""\

681

Underlying method for running something inside of a group.

682

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

683

result, exc_info = None, None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

684

try:

685

self.record('START', subdir, name)

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

686

result = function(*args, **dargs)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

687

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

688

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

689

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

690

except Exception, e:

691

err_msg = str(e) + '\n'

692

err_msg += traceback.format_exc()

693

self.record('END ABORT', subdir, name, err_msg)

694

raise error.JobError(name + ' failed\n' + traceback.format_exc())

695

else:

696

self.record('END GOOD', subdir, name)

697

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

698

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

699

700

701

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

707

"""

708

709

name = function.__name__

710

711

# Allow the tag for the group to be specified.

712

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

716

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

717

718

719

def run_reboot(self, reboot_func, get_kernel_func):

720

"""\

721

A specialization of run_group meant specifically for handling

722

a reboot. Includes support for capturing the kernel version

723

after the reboot.

724

725

reboot_func: a function that carries out the reboot

726

727

get_kernel_func: a function that returns a string

728

representing the kernel version.

729

"""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

730

try:

731

self.record('START', None, 'reboot')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

732

reboot_func()

733

except Exception, e:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

734

err_msg = str(e) + '\n' + traceback.format_exc()

735

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

736

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

737

else:

738

kernel = get_kernel_func()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

739

self.record('END GOOD', None, 'reboot',

Dale Curtis

74a314b

2011-06-23 14:55:46 -0700

[diff] [blame]

740

optional_fields={"kernel": kernel})

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

741

742

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

743

def run_control(self, path):

744

"""Execute a control file found at path (relative to the autotest

745

path). Intended for executing a control file within a control file,

746

not for running the top-level job control file."""

747

path = os.path.join(self.autodir, path)

748

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

749

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

750

751

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

752

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

753

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

758

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

759

760

761

def _add_sysinfo_loggable(self, loggable, on_every_test):

762

if on_every_test:

763

self.sysinfo.test_loggables.add(loggable)

764

else:

765

self.sysinfo.boot_loggables.add(loggable)

766

767

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

768

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

769

"""Poll all the warning loggers and extract any new warnings that have

770

been logged. If the warnings belong to a category that is currently

771

disabled, this method will discard them and they will no longer be

772

retrievable.

773

774

Returns a list of (timestamp, message) tuples, where timestamp is an

775

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

776

warnings = []

777

while True:

778

# pull in a line of output from every logger that has

779

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

780

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

781

closed_loggers = set()

782

for logger in loggers:

783

line = logger.readline()

784

# record any broken pipes (aka line == empty)

785

if len(line) == 0:

786

closed_loggers.add(logger)

787

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

788

# parse out the warning

789

timestamp, msgtype, msg = line.split('\t', 2)

790

timestamp = int(timestamp)

791

# if the warning is valid, add it to the results

792

if self.warning_manager.is_valid(timestamp, msgtype):

793

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

794

795

# stop listening to loggers that are closed

796

self.warning_loggers -= closed_loggers

797

798

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

807

def _unique_subdirectory(self, base_subdirectory_name):

808

"""Compute a unique results subdirectory based on the given name.

809

810

Appends base_subdirectory_name with a number as necessary to find a

811

directory name that doesn't already exist.

812

"""

813

subdirectory = base_subdirectory_name

814

counter = 1

815

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

816

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

821

def get_record_context(self):

822

"""Returns an object representing the current job.record context.

823

824

The object returned is an opaque object with a 0-arg restore method

825

which can be called to restore the job.record context (i.e. indentation)

826

to the current level. The intention is that it should be used when

827

something external which generate job.record calls (e.g. an autotest

828

client) can fail catastrophically and the server job record state

829

needs to be reset to its original "known good" state.

830

831

@return: A context object with a 0-arg restore() method."""

832

return self._indenter.get_context()

833

834

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

835

def record_summary(self, status_code, test_name, reason='', attributes=None,

836

distinguishing_attributes=(), child_test_ids=None):

837

"""Record a summary test result.

838

839

@param status_code: status code string, see

840

common_lib.log.is_valid_status()

841

@param test_name: name of the test

842

@param reason: (optional) string providing detailed reason for test

843

outcome

844

@param attributes: (optional) dict of string keyvals to associate with

845

this result

846

@param distinguishing_attributes: (optional) list of attribute names

847

that should be used to distinguish identically-named test

848

results. These attributes should be present in the attributes

849

parameter. This is used to generate user-friendly subdirectory

850

names.

851

@param child_test_ids: (optional) list of test indices for test results

852

used in generating this result.

853

"""

854

subdirectory_name_parts = [test_name]

855

for attribute in distinguishing_attributes:

856

assert attributes

857

assert attribute in attributes, '%s not in %s' % (attribute,

858

attributes)

859

subdirectory_name_parts.append(attributes[attribute])

860

base_subdirectory_name = '.'.join(subdirectory_name_parts)

861

862

subdirectory = self._unique_subdirectory(base_subdirectory_name)

863

subdirectory_path = os.path.join(self.resultdir, subdirectory)

864

os.mkdir(subdirectory_path)

865

866

self.record(status_code, subdirectory, test_name,

867

status=reason, optional_fields={'is_summary': True})

868

869

if attributes:

870

utils.write_keyval(subdirectory_path, attributes)

871

872

if child_test_ids:

873

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

874

summary_data = {'child_test_ids': ids_string}

875

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

879

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

880

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

881

self.record("INFO", None, None,

882

"disabling %s warnings" % warning_type,

883

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

884

885

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

886

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

887

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

888

self.record("INFO", None, None,

889

"enabling %s warnings" % warning_type,

890

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

891

892

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

893

def get_status_log_path(self, subdir=None):

894

"""Return the path to the job status log.

895

896

@param subdir - Optional paramter indicating that you want the path

897

to a subdirectory status log.

898

899

@returns The path where the status log should be.

900

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

901

if self.resultdir:

902

if subdir:

903

return os.path.join(self.resultdir, subdir, "status.log")

904

else:

905

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

906

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

907

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

908

909

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

910

def _update_uncollected_logs_list(self, update_func):

911

"""Updates the uncollected logs list in a multi-process safe manner.

912

913

@param update_func - a function that updates the list of uncollected

914

logs. Should take one parameter, the list to be updated.

915

"""

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

916

# Skip log collection if file _uncollected_log_file does not exist.

917

if not (self._uncollected_log_file and

918

os.path.exists(self._uncollected_log_file)):

919

return

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

920

if self._uncollected_log_file:

921

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

922

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

923

try:

924

uncollected_logs = pickle.load(log_file)

925

update_func(uncollected_logs)

926

log_file.seek(0)

927

log_file.truncate()

928

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

929

log_file.flush()

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

930

finally:

931

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

936

"""Adds a new set of client logs to the list of uncollected logs,

937

to allow for future log recovery.

938

939

@param host - the hostname of the machine holding the logs

940

@param remote_path - the directory on the remote machine holding logs

941

@param local_path - the local directory to copy the logs into

942

"""

943

def update_func(logs_list):

944

logs_list.append((hostname, remote_path, local_path))

945

self._update_uncollected_logs_list(update_func)

946

947

948

def remove_client_log(self, hostname, remote_path, local_path):

949

"""Removes a set of client logs from the list of uncollected logs,

950

to allow for future log recovery.

951

952

@param host - the hostname of the machine holding the logs

953

@param remote_path - the directory on the remote machine holding logs

954

@param local_path - the local directory to copy the logs into

955

"""

956

def update_func(logs_list):

957

logs_list.remove((hostname, remote_path, local_path))

958

self._update_uncollected_logs_list(update_func)

959

960

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

961

def get_client_logs(self):

962

"""Retrieves the list of uncollected logs, if it exists.

963

964

@returns A list of (host, remote_path, local_path) tuples. Returns

965

an empty list if no uncollected logs file exists.

966

"""

967

log_exists = (self._uncollected_log_file and

968

os.path.exists(self._uncollected_log_file))

969

if log_exists:

970

return pickle.load(open(self._uncollected_log_file))

else:

return []

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

975

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

976

"""

977

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

978

979

This sets up the control file API by importing modules and making them

980

available under the appropriate names within namespace.

981

982

For use by _execute_code().

983

984

Args:

985

namespace: The namespace dictionary to fill in.

986

protect: Boolean. If True (the default) any operation that would

987

clobber an existing entry in namespace will cause an error.

988

Raises:

989

error.AutoservError: When a name would be clobbered by import.

990

"""

991

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

992

"""

993

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

994

995

Args:

996

module_name: The string module name.

997

names: A limiting list of names to import from module_name. If

998

empty (the default), all names are imported from the module

999

similar to a "from foo.bar import *" statement.

1000

Raises:

1001

error.AutoservError: When a name being imported would clobber

1002

a name already in namespace.

1003

"""

1004

module = __import__(module_name, {}, {}, names)

1005

1006

# No names supplied? Import * from the lowest level module.

1007

# (Ugh, why do I have to implement this part myself?)

1008

if not names:

1009

for submodule_name in module_name.split('.')[1:]:

1010

module = getattr(module, submodule_name)

1011

if hasattr(module, '__all__'):

1012

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

1017

# doesn't override anything that already exists.

1018

for name in names:

1019

# Check for conflicts to help prevent future problems.

1020

if name in namespace and protect:

1021

if namespace[name] is not getattr(module, name):

1022

raise error.AutoservError('importing name '

1023

'%s from %s %r would override %r' %

1024

(name, module_name, getattr(module, name),

1025

namespace[name]))

1026

else:

1027

# Encourage cleanliness and the use of __all__ for a

1028

# more concrete API with less surprises on '*' imports.

1029

warnings.warn('%s (%r) being imported from %s for use '

1030

'in server control files is not the '

1031

'first occurrance of that import.' %

1032

(name, namespace[name], module_name))

1033

1034

namespace[name] = getattr(module, name)

1035

1036

1037

# This is the equivalent of prepending a bunch of import statements to

1038

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

1039

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1040

_import_names('autotest_lib.server',

1041

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

1042

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

1043

_import_names('autotest_lib.server.subcommand',

1044

('parallel', 'parallel_simple', 'subcommand'))

1045

_import_names('autotest_lib.server.utils',

1046

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

1047

_import_names('autotest_lib.client.common_lib.error')

1048

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

1049

1050

# Inject ourself as the job object into other classes within the API.

1051

# (Yuck, this injection is a gross thing be part of a public API. -gps)

1052

#

1053

# XXX Base & SiteAutotest do not appear to use .job. Who does?

1054

namespace['autotest'].Autotest.job = self

1055

# server.hosts.base_classes.Host uses .job.

1056

namespace['hosts'].Host.job = self

Eric Li

10222b8

2010-11-24 09:33:15 -0800

[diff] [blame]

1057

namespace['hosts'].factory.ssh_user = self._ssh_user

1058

namespace['hosts'].factory.ssh_port = self._ssh_port

1059

namespace['hosts'].factory.ssh_pass = self._ssh_pass

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1060

1061

1062

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1063

"""

1064

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1065

1066

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

1071

namespace: A dict containing names to make available during execution.

1072

protect: Boolean. If True (the default) a copy of the namespace dict

1073

is used during execution to prevent the code from modifying its

1074

contents outside of this function. If False the raw dict is

1075

passed in and modifications will be allowed.

1076

"""

1077

if protect:

1078

namespace = namespace.copy()

1079

self._fill_server_control_namespace(namespace, protect=protect)

1080

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1081

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1082

machines_text = '\n'.join(self.machines) + '\n'

1083

# Only rewrite the file if it does not match our machine list.

1084

try:

1085

machines_f = open(MACHINES_FILENAME, 'r')

1086

existing_machines_text = machines_f.read()

1087

machines_f.close()

1088

except EnvironmentError:

1089

existing_machines_text = None

1090

if machines_text != existing_machines_text:

1091

utils.open_write_close(MACHINES_FILENAME, machines_text)

1092

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1093

1094

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1095

def _parse_status(self, new_line):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1096

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1097

return

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1098

new_tests = self.parser.process_lines([new_line])

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1099

for test in new_tests:

1100

self.__insert_test(test)

1101

1102

1103

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1104

"""

1105

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1106

database. This method will not raise an exception, even if an

1107

error occurs during the insert, to avoid failing a test

1108

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1109

self.num_tests_run += 1

1110

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1111

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1112

try:

1113

self.results_db.insert_test(self.job_model, test)

1114

except Exception:

1115

msg = ("WARNING: An unexpected error occured while "

1116

"inserting test results into the database. "

1117

"Ignoring error.\n" + traceback.format_exc())

1118

print >> sys.stderr, msg

1119

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1120

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1121

def preprocess_client_state(self):

1122

"""

1123

Produce a state file for initializing the state of a client job.

1124

1125

Creates a new client state file with all the current server state, as

1126

well as some pre-set client state.

1127

1128

@returns The path of the file the state was written into.

1129

"""

1130

# initialize the sysinfo state

1131

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1132

1133

# dump the state out to a tempfile

1134

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1135

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1136

1137

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1138

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1143

"""

1144

Update the state of this job with the state from a client job.

1145

1146

Updates the state of the server side of a job with the final state

1147

of a client job that was run. Updates the non-client-specific state,

1148

pulls in some specific bits from the client-specific state, and then

1149

discards the rest. Removes the state file afterwards

1150

1151

@param state_file A path to the state file from the client.

1152

"""

1153

# update the on-disk state

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1154

try:

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1155

self._state.read_from_file(state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1156

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1157

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1158

# ignore file-not-found errors

1159

if e.errno != errno.ENOENT:

1160

raise

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1161

else:

1162

logging.debug('Client state file %s not found', state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1163

1164

# update the sysinfo state

1165

if self._state.has('client', 'sysinfo'):

1166

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1167

1168

# drop all the client-specific state

1169

self._state.discard_namespace('client')

1170

1171

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

1172

def clear_all_known_hosts(self):

1173

"""Clears known hosts files for all AbstractSSHHosts."""

1174

for host in self.hosts:

1175

if isinstance(host, abstract_ssh.AbstractSSHHost):

1176

host.clear_known_hosts()

1177

1178

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1179

class warning_manager(object):

1180

"""Class for controlling warning logs. Manages the enabling and disabling

1181

of warnings."""

1182

def __init__(self):

1183

# a map of warning types to a list of disabled time intervals

1184

self.disabled_warnings = {}

1185

1186

1187

def is_valid(self, timestamp, warning_type):

1188

"""Indicates if a warning (based on the time it occured and its type)

1189

is a valid warning. A warning is considered "invalid" if this type of

1190

warning was marked as "disabled" at the time the warning occured."""

1191

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1192

for start, end in disabled_intervals:

1193

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1199

"""As of now, disables all further warnings of this type."""

1200

intervals = self.disabled_warnings.setdefault(warning_type, [])

1201

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1202

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1203

1204

1205

def enable_warnings(self, warning_type, current_time_func=time.time):

1206

"""As of now, enables all further warnings of this type."""

1207

intervals = self.disabled_warnings.get(warning_type, [])

1208

if intervals and intervals[-1][1] is None:

jadmanski