Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2013-04-12 09:31:29 -0700

[diff] [blame]

1

# pylint: disable-msg=C0111

2

Paul Pendlebury

7c1fdcf

2011-05-04 12:39:15 -0700

[diff] [blame]

3

4

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

mbligh

57e7866

2008-06-17 19:53:49 +0000

[diff] [blame]

6

"""

7

The main job wrapper for the server side.

8

9

This is the core infrastructure. Derived from the client side job.py

10

11

Copyright Martin J. Bligh, Andy Whitcroft 2007

12

"""

13

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

14

import getpass, os, sys, re, tempfile, time, select, platform

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

15

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

16

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

17

from autotest_lib.client.common_lib import base_job

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

18

from autotest_lib.client.common_lib import error, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

19

from autotest_lib.client.common_lib import logging_manager

Paul Pendlebury

5759356

2011-06-15 10:45:49 -0700

[diff] [blame]

20

from autotest_lib.server import test, subcommand, profilers

beeps

d067268

2013-09-16 17:32:16 -0700

[diff] [blame^]

21

from autotest_lib.server.hosts import abstract_ssh, factory as host_factory

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

22

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

23

24

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

25

def _control_segment_path(name):

26

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

27

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

28

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

29

30

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

31

CLIENT_CONTROL_FILENAME = 'control'

32

SERVER_CONTROL_FILENAME = 'control.srv'

33

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

34

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

35

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

36

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

37

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

38

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

39

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

40

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

41

REPAIR_CONTROL_FILE = _control_segment_path('repair')

Alex Miller

cb79ba7

2013-05-29 14:43:00 -0700

[diff] [blame]

42

PROVISION_CONTROL_FILE = _control_segment_path('provision')

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame]

43

VERIFY_JOB_REPO_URL_CONTROL_FILE = _control_segment_path('verify_job_repo_url')

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

44

RESET_CONTROL_FILE = _control_segment_path('reset')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

45

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

46

# by default provide a stub that generates no site data

47

def _get_site_job_data_dummy(job):

return {}

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

51

class status_indenter(base_job.status_indenter):

52

"""Provide a simple integer-backed status indenter."""

def __init__(self):

self._indent = 0

@property

def indent(self):

return self._indent

def increment(self):

self._indent += 1

def decrement(self):

self._indent -= 1

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

70

def get_context(self):

71

"""Returns a context object for use by job.get_record_context."""

72

class context(object):

73

def __init__(self, indenter, indent):

74

self._indenter = indenter

75

self._indent = indent

76

def restore(self):

77

self._indenter._indent = self._indent

78

return context(self, self._indent)

79

80

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

81

class server_job_record_hook(object):

82

"""The job.record hook for server job. Used to inject WARN messages from

83

the console or vlm whenever new logs are written, and to echo any logs

84

to INFO level logging. Implemented as a class so that it can use state to

85

block recursive calls, so that the hook can call job.record itself to

86

log WARN messages.

87

88

Depends on job._read_warnings and job._logger.

89

"""

90

def __init__(self, job):

91

self._job = job

92

self._being_called = False

93

94

95

def __call__(self, entry):

96

"""A wrapper around the 'real' record hook, the _hook method, which

97

prevents recursion. This isn't making any effort to be threadsafe,

98

the intent is to outright block infinite recursion via a

99

job.record->_hook->job.record->_hook->job.record... chain."""

100

if self._being_called:

101

return

102

self._being_called = True

103

try:

104

self._hook(self._job, entry)

105

finally:

106

self._being_called = False

@staticmethod

def _hook(job, entry):

111

"""The core hook, which can safely call job.record."""

112

entries = []

113

# poll all our warning loggers for new warnings

114

for timestamp, msg in job._read_warnings():

115

warning_entry = base_job.status_log_entry(

116

'WARN', None, None, msg, {}, timestamp=timestamp)

117

entries.append(warning_entry)

118

job.record_entry(warning_entry)

119

# echo rendered versions of all the status logs to info

120

entries.append(entry)

121

for entry in entries:

122

rendered_entry = job._logger.render_entry(entry)

123

logging.info(rendered_entry)

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

124

job._parse_status(rendered_entry)

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

125

126

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

127

class base_server_job(base_job.base_job):

128

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

129

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

130

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

139

"""

140

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

141

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

142

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame]

143

# TODO crbug.com/285395 eliminate ssh_verbosity_flag

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

144

def __init__(self, control, args, resultdir, label, user, machines,

145

client=False, parse_job='',

beeps

d067268

2013-09-16 17:32:16 -0700

[diff] [blame^]

146

ssh_user=host_factory.DEFAULT_SSH_USER,

147

ssh_port=host_factory.DEFAULT_SSH_PORT,

148

ssh_pass=host_factory.DEFAULT_SSH_PASS,

149

ssh_verbosity_flag=host_factory.DEFAULT_SSH_VERBOSITY,

150

ssh_options=host_factory.DEFAULT_SSH_OPTIONS,

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame]

151

test_retry=0, group_name='',

Fang Deng

2013-08-20 12:59:46 -0700

[diff] [blame]

152

tag='', disable_sysinfo=False,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

153

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

154

"""

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

155

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

156

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

157

@param control: The pathname of the control file.

158

@param args: Passed to the control file.

159

@param resultdir: Where to throw the results.

160

@param label: Description of the job.

161

@param user: Username for the job (email address).

162

@param client: True if this is a client-side control file.

163

@param parse_job: string, if supplied it is the job execution tag that

164

the results will be passed through to the TKO parser with.

165

@param ssh_user: The SSH username. [root]

166

@param ssh_port: The SSH port number. [22]

167

@param ssh_pass: The SSH passphrase, if needed.

Fang Deng

2013-08-20 12:59:46 -0700

[diff] [blame]

168

@param ssh_verbosity_flag: The SSH verbosity flag, '-v', '-vv',

169

'-vvv', or an empty string if not needed.

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame]

170

@param ssh_options: A string giving additional options that will be

171

included in ssh commands.

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

172

@param test_retry: The number of times to retry a test if the test did

173

not complete successfully.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

174

@param group_name: If supplied, this will be written out as

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

175

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

176

@param tag: The job execution tag from the scheduler. [optional]

Christopher Wiley

8a91f23

2013-07-09 11:02:27 -0700

[diff] [blame]

177

@param disable_sysinfo: Whether we should disable the sysinfo step of

178

tests for a modest shortening of test time. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

179

@param control_filename: The filename where the server control file

180

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

181

"""

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

182

super(base_server_job, self).__init__(resultdir=resultdir,

183

test_retry=test_retry)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

184

path = os.path.dirname(__file__)

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

185

self.test_retry = test_retry

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

186

self.control = control

187

self._uncollected_log_file = os.path.join(self.resultdir,

188

'uncollected_logs')

189

debugdir = os.path.join(self.resultdir, 'debug')

190

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

197

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

198

self.args = args

Peter Mayo

7a87576

2012-06-13 14:38:15 -0400

[diff] [blame]

199

self.label = label

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

200

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

201

self._client = client

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

202

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

203

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

204

self._ssh_user = ssh_user

205

self._ssh_port = ssh_port

206

self._ssh_pass = ssh_pass

Fang Deng

2013-08-20 12:59:46 -0700

[diff] [blame]

207

self._ssh_verbosity_flag = ssh_verbosity_flag

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame]

208

self._ssh_options = ssh_options

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

209

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

210

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

211

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

212

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

213

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

214

self._control_filename = control_filename

Christopher Wiley

8a91f23

2013-07-09 11:02:27 -0700

[diff] [blame]

215

self._disable_sysinfo = disable_sysinfo

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

216

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

217

self.logging = logging_manager.get_logging_manager(

218

manage_stdout_and_stderr=True, redirect_fds=True)

219

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

220

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

221

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

222

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

223

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

224

job_data = {'label' : label, 'user' : user,

225

'hostname' : ','.join(machines),

Eric Li

861b2d5

2011-02-04 14:50:35 -0800

[diff] [blame]

226

'drone' : platform.node(),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

227

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

228

'job_started' : str(int(time.time()))}

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

229

if group_name:

230

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

231

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

232

# only write these keyvals out on the first job in a resultdir

233

if 'job_started' not in utils.read_keyval(self.resultdir):

234

job_data.update(get_site_job_data(self))

235

utils.write_keyval(self.resultdir, job_data)

236

237

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

238

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

239

self.pkgmgr = packages.PackageManager(

240

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

241

self.num_tests_run = 0

242

self.num_tests_failed = 0

243

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

244

self._register_subcommand_hooks()

245

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

246

# these components aren't usable on the server

247

self.bootloader = None

248

self.harness = None

249

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

250

# set up the status logger

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

251

self._indenter = status_indenter()

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

252

self._logger = base_job.status_logger(

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

253

self, self._indenter, 'status.log', 'status.log',

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

254

record_hook=server_job_record_hook(self))

255

Dan Shi

b03ea9d

2013-08-15 17:13:27 -0700

[diff] [blame]

256

# Initialize a flag to indicate DUT failure during the test, e.g.,

257

# unexpected reboot.

258

self.failed_with_device_error = False

259

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

260

261

@classmethod

262

def _find_base_directories(cls):

263

"""

264

Determine locations of autodir, clientdir and serverdir. Assumes

265

that this file is located within serverdir and uses __file__ along

266

with relative paths to resolve the location.

267

"""

268

serverdir = os.path.abspath(os.path.dirname(__file__))

269

autodir = os.path.normpath(os.path.join(serverdir, '..'))

270

clientdir = os.path.join(autodir, 'client')

271

return autodir, clientdir, serverdir

272

273

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

274

def _find_resultdir(self, resultdir, *args, **dargs):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

275

"""

276

Determine the location of resultdir. For server jobs we expect one to

277

always be explicitly passed in to __init__, so just return that.

278

"""

279

if resultdir:

280

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

284

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

285

def _get_status_logger(self):

286

"""Return a reference to the status logger."""

return self._logger

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

290

@staticmethod

291

def _load_control_file(path):

292

f = open(path)

293

try:

294

control_file = f.read()

295

finally:

296

f.close()

297

return re.sub('\r', '', control_file)

298

299

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

300

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

301

"""

302

Register some hooks into the subcommand modules that allow us

303

to properly clean up self.hosts created in forked subprocesses.

304

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

305

def on_fork(cmd):

306

self._existing_hosts_on_fork = set(self.hosts)

307

def on_join(cmd):

308

new_hosts = self.hosts - self._existing_hosts_on_fork

309

for host in new_hosts:

310

host.close()

311

subcommand.subcommand.register_fork_hook(on_fork)

312

subcommand.subcommand.register_join_hook(on_join)

313

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

314

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

315

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

316

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

317

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

318

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

319

the database if necessary.

320

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

321

if not self._using_parser:

322

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

323

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

324

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

325

parse_log = open(parse_log, 'w', 0)

326

tko_utils.redirect_parser_debugging(parse_log)

327

# create a job model object and set up the db

328

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

329

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

330

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

331

self.parser.start(self.job_model)

332

# check if a job already exists in the db and insert it if

333

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

334

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

335

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

336

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

337

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

338

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

339

self.job_model.index = job_idx

340

self.job_model.machine_idx = machine_idx

341

342

343

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

344

"""

345

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

346

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

347

remaining test results to the results db)

348

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

349

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

350

return

351

final_tests = self.parser.end()

352

for test in final_tests:

353

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

354

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

355

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame]

356

# TODO crbug.com/285395 add a kwargs parameter.

357

def _make_namespace(self):

358

"""Create a namespace dictionary to be passed along to control file.

359

360

Creates a namespace argument populated with standard values:

361

machines, job, ssh_user, ssh_port, ssh_pass, ssh_verbosity_flag,

362

and ssh_options.

363

"""

364

namespace = {'machines' : self.machines,

365

'job' : self,

366

'ssh_user' : self._ssh_user,

367

'ssh_port' : self._ssh_port,

368

'ssh_pass' : self._ssh_pass,

369

'ssh_verbosity_flag' : self._ssh_verbosity_flag,

370

'ssh_options' : self._ssh_options}

371

return namespace

372

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

373

374

def verify(self):

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

375

"""Verify machines are all ssh-able."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

376

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

377

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

378

if self.resultdir:

379

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

380

try:

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame]

381

namespace = self._make_namespace()

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

382

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

383

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

384

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

385

self.record('ABORT', None, None, msg)

raise

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

389

def reset(self):

390

"""Reset machines by first cleanup then verify each machine."""

391

if not self.machines:

392

raise error.AutoservError('No machines specified to reset.')

393

if self.resultdir:

394

os.chdir(self.resultdir)

395

396

try:

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame]

397

namespace = self._make_namespace()

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

398

self._execute_code(RESET_CONTROL_FILE, namespace, protect=False)

399

except Exception as e:

400

msg = ('Reset failed\n' + str(e) + '\n' +

401

traceback.format_exc())

402

self.record('ABORT', None, None, msg)

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

406

def repair(self, host_protection):

407

if not self.machines:

408

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

409

if self.resultdir:

410

os.chdir(self.resultdir)

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame]

411

412

namespace = self._make_namespace()

413

namespace.update({'protection_level' : host_protection})

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

414

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

415

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

416

417

Alex Miller

cb79ba7

2013-05-29 14:43:00 -0700

[diff] [blame]

418

def provision(self, labels):

419

"""

420

Provision all hosts to match |labels|.

421

422

@param labels: A comma seperated string of labels to provision the

host to.

"""

namespace = {'provision_labels': labels}

427

control = self._load_control_file(PROVISION_CONTROL_FILE)

428

self.run(control=control, namespace=namespace)

429

430

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

431

def precheck(self):

432

"""

433

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

439

"""

440

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

446

"""

447

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

453

"""

454

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

459

def _make_parallel_wrapper(self, function, machines, log):

460

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

461

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

462

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

463

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

464

self._parse_job += "/" + machine

465

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

466

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

467

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

468

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

469

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

470

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

471

result = function(machine)

472

self.cleanup_parser()

473

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

474

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

475

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

476

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

477

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

478

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

479

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

480

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

481

result = function(machine)

482

return result

483

else:

484

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

489

return_results=False):

490

"""

491

Run 'function' using parallel_simple, with an extra wrapper to handle

492

the necessary setup for continuous parsing, if possible. If continuous

493

parsing is already properly initialized then this should just work.

494

495

@param function: A callable to run in parallel given each machine.

496

@param machines: A list of machine names to be passed one per subcommand

497

invocation of function.

498

@param log: If True, output will be written to output in a subdirectory

499

named after each machine.

500

@param timeout: Seconds after which the function call should timeout.

501

@param return_results: If True instead of an AutoServError being raised

502

on any error a list of the results|exceptions from the function

503

called on each arg is returned. [default: False]

504

505

@raises error.AutotestError: If any of the functions failed.

506

"""

507

wrapper = self._make_parallel_wrapper(function, machines, log)

508

return subcommand.parallel_simple(wrapper, machines,

509

log=log, timeout=timeout,

510

return_results=return_results)

511

512

513

def parallel_on_machines(self, function, machines, timeout=None):

514

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

515

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

516

@param machines: A list of machines to call function(machine) on.

517

@param timeout: Seconds after which the function call should timeout.

518

519

@returns A list of machines on which function(machine) returned

520

without raising an exception.

521

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

522

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

523

return_results=True)

524

success_machines = []

525

for result, machine in itertools.izip(results, machines):

526

if not isinstance(result, Exception):

527

success_machines.append(machine)

528

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

529

530

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

531

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

532

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

533

collect_crashdumps=True, namespace={}, control=None,

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame]

534

control_file_dir=None, verify_job_repo_url=False,

Christopher Wiley

f594c5e

2013-07-03 18:25:30 -0700

[diff] [blame]

535

only_collect_crashinfo=False, skip_crash_collection=False):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

536

# for a normal job, make sure the uncollected logs file exists

537

# for a crashinfo-only run it should already exist, bail out otherwise

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

538

created_uncollected_logs = False

Alex Miller

45554f3

2013-08-13 16:48:29 -0700

[diff] [blame]

539

logging.info("I am PID %s", os.getpid())

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

540

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

541

if only_collect_crashinfo:

542

# if this is a crashinfo-only run, and there were no existing

543

# uncollected logs, just bail out early

544

logging.info("No existing uncollected logs, "

545

"skipping crashinfo collection")

546

return

547

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

548

log_file = open(self._uncollected_log_file, "w")

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

549

pickle.dump([], log_file)

550

log_file.close()

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

551

created_uncollected_logs = True

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

552

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

553

# use a copy so changes don't affect the original dictionary

554

namespace = namespace.copy()

555

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

556

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

557

if self.control is None:

558

control = ''

559

else:

560

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

561

if control_file_dir is None:

562

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

563

564

self.aborted = False

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame]

565

namespace.update(self._make_namespace())

566

namespace.update({'args' : self.args})

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

567

test_start_time = int(time.time())

568

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

569

if self.resultdir:

570

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

571

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

572

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

573

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

574

jadmanski

cdd0c40

2008-09-19 21:21:31 +0000

[diff] [blame]

575

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

576

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

577

try:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

578

try:

579

if install_before and machines:

580

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

581

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

582

if only_collect_crashinfo:

583

return

584

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame]

585

# If the verify_job_repo_url option is set but we're unable

586

# to actually verify that the job_repo_url contains the autotest

587

# package, this job will fail.

588

if verify_job_repo_url:

589

self._execute_code(VERIFY_JOB_REPO_URL_CONTROL_FILE,

590

namespace)

591

else:

592

logging.warning('Not checking if job_repo_url contains '

593

'autotest packages on %s', machines)

594

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

595

# determine the dir to write the control files to

596

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

597

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

598

if cfd_specified:

599

temp_control_file_dir = None

600

else:

601

temp_control_file_dir = tempfile.mkdtemp(

602

suffix='temp_control_file_dir')

603

control_file_dir = temp_control_file_dir

604

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

605

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

606

client_control_file = os.path.join(control_file_dir,

607

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

608

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

609

namespace['control'] = control

610

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

611

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

612

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

613

else:

614

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

615

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

616

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

617

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

618

Dan Shi

b03ea9d

2013-08-15 17:13:27 -0700

[diff] [blame]

619

# If no device error occured, no need to collect crashinfo.

620

collect_crashinfo = self.failed_with_device_error

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

621

except Exception, e:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

622

try:

623

logging.exception(

624

'Exception escaped control file, job aborting:')

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

625

self.record('INFO', None, None, str(e),

626

{'job_abort_reason': str(e)})

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

627

except:

628

pass # don't let logging exceptions here interfere

629

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

630

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

631

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

632

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

633

try:

634

shutil.rmtree(temp_control_file_dir)

635

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

636

logging.warn('Could not remove temp directory %s: %s',

637

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

638

jadmanski

cdd0c40

2008-09-19 21:21:31 +0000

[diff] [blame]

639

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

640

namespace['test_start_time'] = test_start_time

Christopher Wiley

f594c5e

2013-07-03 18:25:30 -0700

[diff] [blame]

641

if skip_crash_collection:

642

logging.info('Skipping crash dump/info collection '

643

'as requested.')

644

elif collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

645

# includes crashdumps

646

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

cdd0c40

2008-09-19 21:21:31 +0000

[diff] [blame]

647

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

648

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

649

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

650

if cleanup and machines:

651

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

Chris Sosa

f4d43ff

2012-10-30 11:21:05 -0700

[diff] [blame]

652

if self._uncollected_log_file and created_uncollected_logs:

653

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

654

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

655

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

656

657

658

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

659

"""

660

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

661

662

tag

663

tag to add to testname

664

url

665

url of the test to run

666

"""

Christopher Wiley

8a91f23

2013-07-09 11:02:27 -0700

[diff] [blame]

667

if self._disable_sysinfo:

668

dargs['disable_sysinfo'] = True

669

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

670

group, testname = self.pkgmgr.get_package_name(url, 'test')

671

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

672

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

677

except error.TestBaseException, e:

678

self.record(e.exit_status, subdir, testname, str(e))

679

raise

680

except Exception, e:

681

info = str(e) + "\n" + traceback.format_exc()

682

self.record('FAIL', subdir, testname, info)

683

raise

684

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

685

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

686

687

result, exc_info = self._run_group(testname, subdir, group_func)

688

if exc_info and isinstance(exc_info[1], error.TestBaseException):

689

return False

690

elif exc_info:

691

raise exc_info[0], exc_info[1], exc_info[2]

692

else:

693

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

694

695

696

def _run_group(self, name, subdir, function, *args, **dargs):

697

"""\

698

Underlying method for running something inside of a group.

699

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

700

result, exc_info = None, None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

701

try:

702

self.record('START', subdir, name)

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

703

result = function(*args, **dargs)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

704

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

705

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

706

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

707

except Exception, e:

708

err_msg = str(e) + '\n'

709

err_msg += traceback.format_exc()

710

self.record('END ABORT', subdir, name, err_msg)

711

raise error.JobError(name + ' failed\n' + traceback.format_exc())

712

else:

713

self.record('END GOOD', subdir, name)

714

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

715

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

716

717

718

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

724

"""

725

726

name = function.__name__

727

728

# Allow the tag for the group to be specified.

729

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

733

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

734

735

736

def run_reboot(self, reboot_func, get_kernel_func):

737

"""\

738

A specialization of run_group meant specifically for handling

739

a reboot. Includes support for capturing the kernel version

740

after the reboot.

741

742

reboot_func: a function that carries out the reboot

743

744

get_kernel_func: a function that returns a string

745

representing the kernel version.

746

"""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

747

try:

748

self.record('START', None, 'reboot')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

749

reboot_func()

750

except Exception, e:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

751

err_msg = str(e) + '\n' + traceback.format_exc()

752

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

753

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

754

else:

755

kernel = get_kernel_func()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

756

self.record('END GOOD', None, 'reboot',

Dale Curtis

74a314b

2011-06-23 14:55:46 -0700

[diff] [blame]

757

optional_fields={"kernel": kernel})

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

758

759

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

760

def run_control(self, path):

761

"""Execute a control file found at path (relative to the autotest

762

path). Intended for executing a control file within a control file,

763

not for running the top-level job control file."""

764

path = os.path.join(self.autodir, path)

765

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

766

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

767

768

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

769

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

770

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

775

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

776

777

778

def _add_sysinfo_loggable(self, loggable, on_every_test):

779

if on_every_test:

780

self.sysinfo.test_loggables.add(loggable)

781

else:

782

self.sysinfo.boot_loggables.add(loggable)

783

784

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

785

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

786

"""Poll all the warning loggers and extract any new warnings that have

787

been logged. If the warnings belong to a category that is currently

788

disabled, this method will discard them and they will no longer be

789

retrievable.

790

791

Returns a list of (timestamp, message) tuples, where timestamp is an

792

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

793

warnings = []

794

while True:

795

# pull in a line of output from every logger that has

796

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

797

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

798

closed_loggers = set()

799

for logger in loggers:

800

line = logger.readline()

801

# record any broken pipes (aka line == empty)

802

if len(line) == 0:

803

closed_loggers.add(logger)

804

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

805

# parse out the warning

806

timestamp, msgtype, msg = line.split('\t', 2)

807

timestamp = int(timestamp)

808

# if the warning is valid, add it to the results

809

if self.warning_manager.is_valid(timestamp, msgtype):

810

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

811

812

# stop listening to loggers that are closed

813

self.warning_loggers -= closed_loggers

814

815

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

824

def _unique_subdirectory(self, base_subdirectory_name):

825

"""Compute a unique results subdirectory based on the given name.

826

827

Appends base_subdirectory_name with a number as necessary to find a

828

directory name that doesn't already exist.

829

"""

830

subdirectory = base_subdirectory_name

831

counter = 1

832

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

833

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

838

def get_record_context(self):

839

"""Returns an object representing the current job.record context.

840

841

The object returned is an opaque object with a 0-arg restore method

842

which can be called to restore the job.record context (i.e. indentation)

843

to the current level. The intention is that it should be used when

844

something external which generate job.record calls (e.g. an autotest

845

client) can fail catastrophically and the server job record state

846

needs to be reset to its original "known good" state.

847

848

@return: A context object with a 0-arg restore() method."""

849

return self._indenter.get_context()

850

851

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

852

def record_summary(self, status_code, test_name, reason='', attributes=None,

853

distinguishing_attributes=(), child_test_ids=None):

854

"""Record a summary test result.

855

856

@param status_code: status code string, see

857

common_lib.log.is_valid_status()

858

@param test_name: name of the test

859

@param reason: (optional) string providing detailed reason for test

860

outcome

861

@param attributes: (optional) dict of string keyvals to associate with

862

this result

863

@param distinguishing_attributes: (optional) list of attribute names

864

that should be used to distinguish identically-named test

865

results. These attributes should be present in the attributes

866

parameter. This is used to generate user-friendly subdirectory

867

names.

868

@param child_test_ids: (optional) list of test indices for test results

869

used in generating this result.

870

"""

871

subdirectory_name_parts = [test_name]

872

for attribute in distinguishing_attributes:

873

assert attributes

874

assert attribute in attributes, '%s not in %s' % (attribute,

875

attributes)

876

subdirectory_name_parts.append(attributes[attribute])

877

base_subdirectory_name = '.'.join(subdirectory_name_parts)

878

879

subdirectory = self._unique_subdirectory(base_subdirectory_name)

880

subdirectory_path = os.path.join(self.resultdir, subdirectory)

881

os.mkdir(subdirectory_path)

882

883

self.record(status_code, subdirectory, test_name,

884

status=reason, optional_fields={'is_summary': True})

885

886

if attributes:

887

utils.write_keyval(subdirectory_path, attributes)

888

889

if child_test_ids:

890

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

891

summary_data = {'child_test_ids': ids_string}

892

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

896

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

897

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

898

self.record("INFO", None, None,

899

"disabling %s warnings" % warning_type,

900

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

901

902

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

903

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

904

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

905

self.record("INFO", None, None,

906

"enabling %s warnings" % warning_type,

907

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

908

909

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

910

def get_status_log_path(self, subdir=None):

911

"""Return the path to the job status log.

912

913

@param subdir - Optional paramter indicating that you want the path

914

to a subdirectory status log.

915

916

@returns The path where the status log should be.

917

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

918

if self.resultdir:

919

if subdir:

920

return os.path.join(self.resultdir, subdir, "status.log")

921

else:

922

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

923

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

924

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

925

926

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

927

def _update_uncollected_logs_list(self, update_func):

928

"""Updates the uncollected logs list in a multi-process safe manner.

929

930

@param update_func - a function that updates the list of uncollected

931

logs. Should take one parameter, the list to be updated.

932

"""

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

933

# Skip log collection if file _uncollected_log_file does not exist.

934

if not (self._uncollected_log_file and

935

os.path.exists(self._uncollected_log_file)):

936

return

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

937

if self._uncollected_log_file:

938

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

939

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

940

try:

941

uncollected_logs = pickle.load(log_file)

942

update_func(uncollected_logs)

943

log_file.seek(0)

944

log_file.truncate()

945

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

946

log_file.flush()

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

947

finally:

948

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

953

"""Adds a new set of client logs to the list of uncollected logs,

954

to allow for future log recovery.

955

956

@param host - the hostname of the machine holding the logs

957

@param remote_path - the directory on the remote machine holding logs

958

@param local_path - the local directory to copy the logs into

959

"""

960

def update_func(logs_list):

961

logs_list.append((hostname, remote_path, local_path))

962

self._update_uncollected_logs_list(update_func)

963

964

965

def remove_client_log(self, hostname, remote_path, local_path):

966

"""Removes a set of client logs from the list of uncollected logs,

967

to allow for future log recovery.

968

969

@param host - the hostname of the machine holding the logs

970

@param remote_path - the directory on the remote machine holding logs

971

@param local_path - the local directory to copy the logs into

972

"""

973

def update_func(logs_list):

974

logs_list.remove((hostname, remote_path, local_path))

975

self._update_uncollected_logs_list(update_func)

976

977

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

978

def get_client_logs(self):

979

"""Retrieves the list of uncollected logs, if it exists.

980

981

@returns A list of (host, remote_path, local_path) tuples. Returns

982

an empty list if no uncollected logs file exists.

983

"""

984

log_exists = (self._uncollected_log_file and

985

os.path.exists(self._uncollected_log_file))

986

if log_exists:

987

return pickle.load(open(self._uncollected_log_file))

else:

return []

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

992

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

993

"""

994

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

995

996

This sets up the control file API by importing modules and making them

997

available under the appropriate names within namespace.

998

999

For use by _execute_code().

1000

1001

Args:

1002

namespace: The namespace dictionary to fill in.

1003

protect: Boolean. If True (the default) any operation that would

1004

clobber an existing entry in namespace will cause an error.

1005

Raises:

1006

error.AutoservError: When a name would be clobbered by import.

1007

"""

1008

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1009

"""

1010

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1011

1012

Args:

1013

module_name: The string module name.

1014

names: A limiting list of names to import from module_name. If

1015

empty (the default), all names are imported from the module

1016

similar to a "from foo.bar import *" statement.

1017

Raises:

1018

error.AutoservError: When a name being imported would clobber

1019

a name already in namespace.

1020

"""

1021

module = __import__(module_name, {}, {}, names)

1022

1023

# No names supplied? Import * from the lowest level module.

1024

# (Ugh, why do I have to implement this part myself?)

1025

if not names:

1026

for submodule_name in module_name.split('.')[1:]:

1027

module = getattr(module, submodule_name)

1028

if hasattr(module, '__all__'):

1029

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

1034

# doesn't override anything that already exists.

1035

for name in names:

1036

# Check for conflicts to help prevent future problems.

1037

if name in namespace and protect:

1038

if namespace[name] is not getattr(module, name):

1039

raise error.AutoservError('importing name '

1040

'%s from %s %r would override %r' %

1041

(name, module_name, getattr(module, name),

1042

namespace[name]))

1043

else:

1044

# Encourage cleanliness and the use of __all__ for a

1045

# more concrete API with less surprises on '*' imports.

1046

warnings.warn('%s (%r) being imported from %s for use '

1047

'in server control files is not the '

1048

'first occurrance of that import.' %

1049

(name, namespace[name], module_name))

1050

1051

namespace[name] = getattr(module, name)

1052

1053

1054

# This is the equivalent of prepending a bunch of import statements to

1055

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

1056

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1057

_import_names('autotest_lib.server',

1058

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

1059

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

1060

_import_names('autotest_lib.server.subcommand',

1061

('parallel', 'parallel_simple', 'subcommand'))

1062

_import_names('autotest_lib.server.utils',

1063

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

1064

_import_names('autotest_lib.client.common_lib.error')

1065

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

1066

1067

# Inject ourself as the job object into other classes within the API.

1068

# (Yuck, this injection is a gross thing be part of a public API. -gps)

1069

#

1070

# XXX Base & SiteAutotest do not appear to use .job. Who does?

1071

namespace['autotest'].Autotest.job = self

1072

# server.hosts.base_classes.Host uses .job.

1073

namespace['hosts'].Host.job = self

Eric Li

10222b8

2010-11-24 09:33:15 -0800

[diff] [blame]

1074

namespace['hosts'].factory.ssh_user = self._ssh_user

1075

namespace['hosts'].factory.ssh_port = self._ssh_port

1076

namespace['hosts'].factory.ssh_pass = self._ssh_pass

Fang Deng

2013-08-20 12:59:46 -0700

[diff] [blame]

1077

namespace['hosts'].factory.ssh_verbosity_flag = (

1078

self._ssh_verbosity_flag)

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame]

1079

namespace['hosts'].factory.ssh_options = self._ssh_options

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1080

1081

1082

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1083

"""

1084

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1085

1086

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

1091

namespace: A dict containing names to make available during execution.

1092

protect: Boolean. If True (the default) a copy of the namespace dict

1093

is used during execution to prevent the code from modifying its

1094

contents outside of this function. If False the raw dict is

1095

passed in and modifications will be allowed.

1096

"""

1097

if protect:

1098

namespace = namespace.copy()

1099

self._fill_server_control_namespace(namespace, protect=protect)

1100

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1101

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1102

machines_text = '\n'.join(self.machines) + '\n'

1103

# Only rewrite the file if it does not match our machine list.

1104

try:

1105

machines_f = open(MACHINES_FILENAME, 'r')

1106

existing_machines_text = machines_f.read()

1107

machines_f.close()

1108

except EnvironmentError:

1109

existing_machines_text = None

1110

if machines_text != existing_machines_text:

1111

utils.open_write_close(MACHINES_FILENAME, machines_text)

1112

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1113

1114

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1115

def _parse_status(self, new_line):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1116

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1117

return

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1118

new_tests = self.parser.process_lines([new_line])

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1119

for test in new_tests:

1120

self.__insert_test(test)

1121

1122

1123

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1124

"""

1125

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1126

database. This method will not raise an exception, even if an

1127

error occurs during the insert, to avoid failing a test

1128

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1129

self.num_tests_run += 1

1130

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1131

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1132

try:

1133

self.results_db.insert_test(self.job_model, test)

1134

except Exception:

1135

msg = ("WARNING: An unexpected error occured while "

1136

"inserting test results into the database. "

1137

"Ignoring error.\n" + traceback.format_exc())

1138

print >> sys.stderr, msg

1139

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1140

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1141

def preprocess_client_state(self):

1142

"""

1143

Produce a state file for initializing the state of a client job.

1144

1145

Creates a new client state file with all the current server state, as

1146

well as some pre-set client state.

1147

1148

@returns The path of the file the state was written into.

1149

"""

1150

# initialize the sysinfo state

1151

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1152

1153

# dump the state out to a tempfile

1154

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1155

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1156

1157

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1158

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1163

"""

1164

Update the state of this job with the state from a client job.

1165

1166

Updates the state of the server side of a job with the final state

1167

of a client job that was run. Updates the non-client-specific state,

1168

pulls in some specific bits from the client-specific state, and then

1169

discards the rest. Removes the state file afterwards

1170

1171

@param state_file A path to the state file from the client.

1172

"""

1173

# update the on-disk state

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1174

try:

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1175

self._state.read_from_file(state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1176

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1177

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1178

# ignore file-not-found errors

1179

if e.errno != errno.ENOENT:

1180

raise

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1181

else:

1182

logging.debug('Client state file %s not found', state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1183

1184

# update the sysinfo state

1185

if self._state.has('client', 'sysinfo'):

1186

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1187

1188

# drop all the client-specific state

1189

self._state.discard_namespace('client')

1190

1191

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

1192

def clear_all_known_hosts(self):

1193

"""Clears known hosts files for all AbstractSSHHosts."""

1194

for host in self.hosts:

1195

if isinstance(host, abstract_ssh.AbstractSSHHost):

1196

host.clear_known_hosts()

1197

1198

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1199

class warning_manager(object):

1200

"""Class for controlling warning logs. Manages the enabling and disabling

1201

of warnings."""

1202

def __init__(self):

1203

# a map of warning types to a list of disabled time intervals

1204

self.disabled_warnings = {}

1205

1206

1207

def is_valid(self, timestamp, warning_type):

1208

"""Indicates if a warning (based on the time it occured and its type)

1209

is a valid warning. A warning is considered "invalid" if this type of

1210

warning was marked as "disabled" at the time the warning occured."""

1211

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1212

for start, end in disabled_intervals:

1213

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1219

"""As of now, disables all further warnings of this type."""

1220

intervals = self.disabled_warnings.setdefault(warning_type, [])

1221

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1222

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1223

1224

1225

def enable_warnings(self, warning_type, current_time_func=time.time):

1226

"""As of now, enables all further warnings of this type."""

1227

intervals = self.disabled_warnings.get(warning_type, [])

1228

if intervals and intervals[-1][1] is None:

jadmanski