Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2013-04-12 09:31:29 -0700

[diff] [blame]

1

# pylint: disable-msg=C0111

2

Paul Pendlebury

7c1fdcf

2011-05-04 12:39:15 -0700

[diff] [blame]

3

4

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

mbligh

57e7866

2008-06-17 19:53:49 +0000

[diff] [blame]

6

"""

7

The main job wrapper for the server side.

8

9

This is the core infrastructure. Derived from the client side job.py

10

11

Copyright Martin J. Bligh, Andy Whitcroft 2007

12

"""

13

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

14

import getpass, os, sys, re, tempfile, time, select, platform

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

15

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

16

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

17

from autotest_lib.client.common_lib import base_job

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

18

from autotest_lib.client.common_lib import error, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

19

from autotest_lib.client.common_lib import logging_manager

Paul Pendlebury

5759356

2011-06-15 10:45:49 -0700

[diff] [blame]

20

from autotest_lib.server import test, subcommand, profilers

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

21

from autotest_lib.server.hosts import abstract_ssh

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

22

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

23

24

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

25

def _control_segment_path(name):

26

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

27

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

28

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

29

30

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

31

CLIENT_CONTROL_FILENAME = 'control'

32

SERVER_CONTROL_FILENAME = 'control.srv'

33

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

34

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

35

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

36

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

37

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

38

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

39

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

40

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

41

REPAIR_CONTROL_FILE = _control_segment_path('repair')

Alex Miller

cb79ba7

2013-05-29 14:43:00 -0700

[diff] [blame]

42

PROVISION_CONTROL_FILE = _control_segment_path('provision')

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame]

43

VERIFY_JOB_REPO_URL_CONTROL_FILE = _control_segment_path('verify_job_repo_url')

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

44

RESET_CONTROL_FILE = _control_segment_path('reset')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

45

46

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

47

# by default provide a stub that generates no site data

48

def _get_site_job_data_dummy(job):

return {}

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

52

class status_indenter(base_job.status_indenter):

53

"""Provide a simple integer-backed status indenter."""

def __init__(self):

self._indent = 0

@property

def indent(self):

return self._indent

def increment(self):

self._indent += 1

def decrement(self):

self._indent -= 1

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

71

def get_context(self):

72

"""Returns a context object for use by job.get_record_context."""

73

class context(object):

74

def __init__(self, indenter, indent):

75

self._indenter = indenter

76

self._indent = indent

77

def restore(self):

78

self._indenter._indent = self._indent

79

return context(self, self._indent)

80

81

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

82

class server_job_record_hook(object):

83

"""The job.record hook for server job. Used to inject WARN messages from

84

the console or vlm whenever new logs are written, and to echo any logs

85

to INFO level logging. Implemented as a class so that it can use state to

86

block recursive calls, so that the hook can call job.record itself to

87

log WARN messages.

88

89

Depends on job._read_warnings and job._logger.

90

"""

91

def __init__(self, job):

92

self._job = job

93

self._being_called = False

94

95

96

def __call__(self, entry):

97

"""A wrapper around the 'real' record hook, the _hook method, which

98

prevents recursion. This isn't making any effort to be threadsafe,

99

the intent is to outright block infinite recursion via a

100

job.record->_hook->job.record->_hook->job.record... chain."""

101

if self._being_called:

102

return

103

self._being_called = True

104

try:

105

self._hook(self._job, entry)

106

finally:

107

self._being_called = False

@staticmethod

def _hook(job, entry):

112

"""The core hook, which can safely call job.record."""

113

entries = []

114

# poll all our warning loggers for new warnings

115

for timestamp, msg in job._read_warnings():

116

warning_entry = base_job.status_log_entry(

117

'WARN', None, None, msg, {}, timestamp=timestamp)

118

entries.append(warning_entry)

119

job.record_entry(warning_entry)

120

# echo rendered versions of all the status logs to info

121

entries.append(entry)

122

for entry in entries:

123

rendered_entry = job._logger.render_entry(entry)

124

logging.info(rendered_entry)

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

125

job._parse_status(rendered_entry)

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

126

127

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

128

class base_server_job(base_job.base_job):

129

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

130

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

131

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

140

"""

141

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

142

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

143

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame^]

144

# TODO crbug.com/285395 eliminate ssh_verbosity_flag

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

145

def __init__(self, control, args, resultdir, label, user, machines,

146

client=False, parse_job='',

Fang Deng

2013-08-20 12:59:46 -0700

[diff] [blame]

147

ssh_user='root', ssh_port=22, ssh_pass='',

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame^]

148

ssh_verbosity_flag='', ssh_options='',

149

test_retry=0, group_name='',

Fang Deng

2013-08-20 12:59:46 -0700

[diff] [blame]

150

tag='', disable_sysinfo=False,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

151

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

152

"""

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

153

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

154

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

155

@param control: The pathname of the control file.

156

@param args: Passed to the control file.

157

@param resultdir: Where to throw the results.

158

@param label: Description of the job.

159

@param user: Username for the job (email address).

160

@param client: True if this is a client-side control file.

161

@param parse_job: string, if supplied it is the job execution tag that

162

the results will be passed through to the TKO parser with.

163

@param ssh_user: The SSH username. [root]

164

@param ssh_port: The SSH port number. [22]

165

@param ssh_pass: The SSH passphrase, if needed.

Fang Deng

2013-08-20 12:59:46 -0700

[diff] [blame]

166

@param ssh_verbosity_flag: The SSH verbosity flag, '-v', '-vv',

167

'-vvv', or an empty string if not needed.

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame^]

168

@param ssh_options: A string giving additional options that will be

169

included in ssh commands.

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

170

@param test_retry: The number of times to retry a test if the test did

171

not complete successfully.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

172

@param group_name: If supplied, this will be written out as

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

173

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

174

@param tag: The job execution tag from the scheduler. [optional]

Christopher Wiley

8a91f23

2013-07-09 11:02:27 -0700

[diff] [blame]

175

@param disable_sysinfo: Whether we should disable the sysinfo step of

176

tests for a modest shortening of test time. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

177

@param control_filename: The filename where the server control file

178

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

179

"""

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

180

super(base_server_job, self).__init__(resultdir=resultdir,

181

test_retry=test_retry)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

182

path = os.path.dirname(__file__)

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

183

self.test_retry = test_retry

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

184

self.control = control

185

self._uncollected_log_file = os.path.join(self.resultdir,

186

'uncollected_logs')

187

debugdir = os.path.join(self.resultdir, 'debug')

188

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

195

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

196

self.args = args

Peter Mayo

7a87576

2012-06-13 14:38:15 -0400

[diff] [blame]

197

self.label = label

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

198

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

199

self._client = client

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

200

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

201

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

202

self._ssh_user = ssh_user

203

self._ssh_port = ssh_port

204

self._ssh_pass = ssh_pass

Fang Deng

2013-08-20 12:59:46 -0700

[diff] [blame]

205

self._ssh_verbosity_flag = ssh_verbosity_flag

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame^]

206

self._ssh_options = ssh_options

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

207

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

208

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

209

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

210

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

211

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

212

self._control_filename = control_filename

Christopher Wiley

8a91f23

2013-07-09 11:02:27 -0700

[diff] [blame]

213

self._disable_sysinfo = disable_sysinfo

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

214

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

215

self.logging = logging_manager.get_logging_manager(

216

manage_stdout_and_stderr=True, redirect_fds=True)

217

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

218

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

219

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

220

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

221

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

222

job_data = {'label' : label, 'user' : user,

223

'hostname' : ','.join(machines),

Eric Li

861b2d5

2011-02-04 14:50:35 -0800

[diff] [blame]

224

'drone' : platform.node(),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

225

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

226

'job_started' : str(int(time.time()))}

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

227

if group_name:

228

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

229

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

230

# only write these keyvals out on the first job in a resultdir

231

if 'job_started' not in utils.read_keyval(self.resultdir):

232

job_data.update(get_site_job_data(self))

233

utils.write_keyval(self.resultdir, job_data)

234

235

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

236

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

237

self.pkgmgr = packages.PackageManager(

238

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

239

self.num_tests_run = 0

240

self.num_tests_failed = 0

241

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

242

self._register_subcommand_hooks()

243

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

244

# these components aren't usable on the server

245

self.bootloader = None

246

self.harness = None

247

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

248

# set up the status logger

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

249

self._indenter = status_indenter()

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

250

self._logger = base_job.status_logger(

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

251

self, self._indenter, 'status.log', 'status.log',

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

252

record_hook=server_job_record_hook(self))

253

Dan Shi

b03ea9d

2013-08-15 17:13:27 -0700

[diff] [blame]

254

# Initialize a flag to indicate DUT failure during the test, e.g.,

255

# unexpected reboot.

256

self.failed_with_device_error = False

257

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

258

259

@classmethod

260

def _find_base_directories(cls):

261

"""

262

Determine locations of autodir, clientdir and serverdir. Assumes

263

that this file is located within serverdir and uses __file__ along

264

with relative paths to resolve the location.

265

"""

266

serverdir = os.path.abspath(os.path.dirname(__file__))

267

autodir = os.path.normpath(os.path.join(serverdir, '..'))

268

clientdir = os.path.join(autodir, 'client')

269

return autodir, clientdir, serverdir

270

271

Scott Zawalski

2013-01-25 16:15:20 -0500

[diff] [blame]

272

def _find_resultdir(self, resultdir, *args, **dargs):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

273

"""

274

Determine the location of resultdir. For server jobs we expect one to

275

always be explicitly passed in to __init__, so just return that.

276

"""

277

if resultdir:

278

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

282

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

283

def _get_status_logger(self):

284

"""Return a reference to the status logger."""

return self._logger

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

288

@staticmethod

289

def _load_control_file(path):

290

f = open(path)

291

try:

292

control_file = f.read()

293

finally:

294

f.close()

295

return re.sub('\r', '', control_file)

296

297

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

298

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

299

"""

300

Register some hooks into the subcommand modules that allow us

301

to properly clean up self.hosts created in forked subprocesses.

302

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

303

def on_fork(cmd):

304

self._existing_hosts_on_fork = set(self.hosts)

305

def on_join(cmd):

306

new_hosts = self.hosts - self._existing_hosts_on_fork

307

for host in new_hosts:

308

host.close()

309

subcommand.subcommand.register_fork_hook(on_fork)

310

subcommand.subcommand.register_join_hook(on_join)

311

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

312

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

313

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

314

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

315

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

316

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

317

the database if necessary.

318

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

319

if not self._using_parser:

320

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

321

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

322

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

323

parse_log = open(parse_log, 'w', 0)

324

tko_utils.redirect_parser_debugging(parse_log)

325

# create a job model object and set up the db

326

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

327

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

328

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

329

self.parser.start(self.job_model)

330

# check if a job already exists in the db and insert it if

331

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

332

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

333

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

334

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

335

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

336

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

337

self.job_model.index = job_idx

338

self.job_model.machine_idx = machine_idx

339

340

341

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

342

"""

343

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

344

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

345

remaining test results to the results db)

346

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

347

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

348

return

349

final_tests = self.parser.end()

350

for test in final_tests:

351

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

352

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

353

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame^]

354

# TODO crbug.com/285395 add a kwargs parameter.

355

def _make_namespace(self):

356

"""Create a namespace dictionary to be passed along to control file.

357

358

Creates a namespace argument populated with standard values:

359

machines, job, ssh_user, ssh_port, ssh_pass, ssh_verbosity_flag,

360

and ssh_options.

361

"""

362

namespace = {'machines' : self.machines,

363

'job' : self,

364

'ssh_user' : self._ssh_user,

365

'ssh_port' : self._ssh_port,

366

'ssh_pass' : self._ssh_pass,

367

'ssh_verbosity_flag' : self._ssh_verbosity_flag,

368

'ssh_options' : self._ssh_options}

369

return namespace

370

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

371

372

def verify(self):

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

373

"""Verify machines are all ssh-able."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

374

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

375

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

376

if self.resultdir:

377

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

378

try:

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame^]

379

namespace = self._make_namespace()

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

380

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

381

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

382

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

383

self.record('ABORT', None, None, msg)

raise

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

387

def reset(self):

388

"""Reset machines by first cleanup then verify each machine."""

389

if not self.machines:

390

raise error.AutoservError('No machines specified to reset.')

391

if self.resultdir:

392

os.chdir(self.resultdir)

393

394

try:

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame^]

395

namespace = self._make_namespace()

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

396

self._execute_code(RESET_CONTROL_FILE, namespace, protect=False)

397

except Exception as e:

398

msg = ('Reset failed\n' + str(e) + '\n' +

399

traceback.format_exc())

400

self.record('ABORT', None, None, msg)

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

404

def repair(self, host_protection):

405

if not self.machines:

406

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

407

if self.resultdir:

408

os.chdir(self.resultdir)

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame^]

409

410

namespace = self._make_namespace()

411

namespace.update({'protection_level' : host_protection})

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

412

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

413

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

414

415

Alex Miller

cb79ba7

2013-05-29 14:43:00 -0700

[diff] [blame]

416

def provision(self, labels):

417

"""

418

Provision all hosts to match |labels|.

419

420

@param labels: A comma seperated string of labels to provision the

host to.

"""

namespace = {'provision_labels': labels}

425

control = self._load_control_file(PROVISION_CONTROL_FILE)

426

self.run(control=control, namespace=namespace)

427

428

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

429

def precheck(self):

430

"""

431

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

437

"""

438

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

444

"""

445

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

451

"""

452

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

457

def _make_parallel_wrapper(self, function, machines, log):

458

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

459

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

460

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

461

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

462

self._parse_job += "/" + machine

463

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

464

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

465

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

466

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

467

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

468

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

469

result = function(machine)

470

self.cleanup_parser()

471

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

472

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

473

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

474

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

475

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

476

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

477

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

478

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

479

result = function(machine)

480

return result

481

else:

482

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

487

return_results=False):

488

"""

489

Run 'function' using parallel_simple, with an extra wrapper to handle

490

the necessary setup for continuous parsing, if possible. If continuous

491

parsing is already properly initialized then this should just work.

492

493

@param function: A callable to run in parallel given each machine.

494

@param machines: A list of machine names to be passed one per subcommand

495

invocation of function.

496

@param log: If True, output will be written to output in a subdirectory

497

named after each machine.

498

@param timeout: Seconds after which the function call should timeout.

499

@param return_results: If True instead of an AutoServError being raised

500

on any error a list of the results|exceptions from the function

501

called on each arg is returned. [default: False]

502

503

@raises error.AutotestError: If any of the functions failed.

504

"""

505

wrapper = self._make_parallel_wrapper(function, machines, log)

506

return subcommand.parallel_simple(wrapper, machines,

507

log=log, timeout=timeout,

508

return_results=return_results)

509

510

511

def parallel_on_machines(self, function, machines, timeout=None):

512

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

513

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

514

@param machines: A list of machines to call function(machine) on.

515

@param timeout: Seconds after which the function call should timeout.

516

517

@returns A list of machines on which function(machine) returned

518

without raising an exception.

519

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

520

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

521

return_results=True)

522

success_machines = []

523

for result, machine in itertools.izip(results, machines):

524

if not isinstance(result, Exception):

525

success_machines.append(machine)

526

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

527

528

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

529

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

530

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

531

collect_crashdumps=True, namespace={}, control=None,

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame]

532

control_file_dir=None, verify_job_repo_url=False,

Christopher Wiley

f594c5e

2013-07-03 18:25:30 -0700

[diff] [blame]

533

only_collect_crashinfo=False, skip_crash_collection=False):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

534

# for a normal job, make sure the uncollected logs file exists

535

# for a crashinfo-only run it should already exist, bail out otherwise

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

536

created_uncollected_logs = False

Alex Miller

45554f3

2013-08-13 16:48:29 -0700

[diff] [blame]

537

logging.info("I am PID %s", os.getpid())

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

538

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

539

if only_collect_crashinfo:

540

# if this is a crashinfo-only run, and there were no existing

541

# uncollected logs, just bail out early

542

logging.info("No existing uncollected logs, "

543

"skipping crashinfo collection")

544

return

545

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

546

log_file = open(self._uncollected_log_file, "w")

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

547

pickle.dump([], log_file)

548

log_file.close()

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

549

created_uncollected_logs = True

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

550

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

551

# use a copy so changes don't affect the original dictionary

552

namespace = namespace.copy()

553

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

554

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

555

if self.control is None:

556

control = ''

557

else:

558

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

559

if control_file_dir is None:

560

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

561

562

self.aborted = False

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame^]

563

namespace.update(self._make_namespace())

564

namespace.update({'args' : self.args})

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

565

test_start_time = int(time.time())

566

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

567

if self.resultdir:

568

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

569

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

570

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

571

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

572

jadmanski

cdd0c40

2008-09-19 21:21:31 +0000

[diff] [blame]

573

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

574

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

575

try:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

576

try:

577

if install_before and machines:

578

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

579

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

580

if only_collect_crashinfo:

581

return

582

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame]

583

# If the verify_job_repo_url option is set but we're unable

584

# to actually verify that the job_repo_url contains the autotest

585

# package, this job will fail.

586

if verify_job_repo_url:

587

self._execute_code(VERIFY_JOB_REPO_URL_CONTROL_FILE,

588

namespace)

589

else:

590

logging.warning('Not checking if job_repo_url contains '

591

'autotest packages on %s', machines)

592

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

593

# determine the dir to write the control files to

594

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

595

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

596

if cfd_specified:

597

temp_control_file_dir = None

598

else:

599

temp_control_file_dir = tempfile.mkdtemp(

600

suffix='temp_control_file_dir')

601

control_file_dir = temp_control_file_dir

602

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

603

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

604

client_control_file = os.path.join(control_file_dir,

605

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

606

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

607

namespace['control'] = control

608

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

609

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

610

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

611

else:

612

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

613

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

614

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

615

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

616

Dan Shi

b03ea9d

2013-08-15 17:13:27 -0700

[diff] [blame]

617

# If no device error occured, no need to collect crashinfo.

618

collect_crashinfo = self.failed_with_device_error

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

619

except Exception, e:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

620

try:

621

logging.exception(

622

'Exception escaped control file, job aborting:')

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

623

self.record('INFO', None, None, str(e),

624

{'job_abort_reason': str(e)})

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

625

except:

626

pass # don't let logging exceptions here interfere

627

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

628

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

629

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

630

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

631

try:

632

shutil.rmtree(temp_control_file_dir)

633

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

634

logging.warn('Could not remove temp directory %s: %s',

635

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

636

jadmanski

cdd0c40

2008-09-19 21:21:31 +0000

[diff] [blame]

637

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

638

namespace['test_start_time'] = test_start_time

Christopher Wiley

f594c5e

2013-07-03 18:25:30 -0700

[diff] [blame]

639

if skip_crash_collection:

640

logging.info('Skipping crash dump/info collection '

641

'as requested.')

642

elif collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

643

# includes crashdumps

644

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

cdd0c40

2008-09-19 21:21:31 +0000

[diff] [blame]

645

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

646

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

647

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

648

if cleanup and machines:

649

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

Chris Sosa

f4d43ff

2012-10-30 11:21:05 -0700

[diff] [blame]

650

if self._uncollected_log_file and created_uncollected_logs:

651

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

652

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

653

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

654

655

656

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

657

"""

658

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

659

660

tag

661

tag to add to testname

662

url

663

url of the test to run

664

"""

Christopher Wiley

8a91f23

2013-07-09 11:02:27 -0700

[diff] [blame]

665

if self._disable_sysinfo:

666

dargs['disable_sysinfo'] = True

667

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

668

group, testname = self.pkgmgr.get_package_name(url, 'test')

669

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

670

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

675

except error.TestBaseException, e:

676

self.record(e.exit_status, subdir, testname, str(e))

677

raise

678

except Exception, e:

679

info = str(e) + "\n" + traceback.format_exc()

680

self.record('FAIL', subdir, testname, info)

681

raise

682

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

683

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

684

685

result, exc_info = self._run_group(testname, subdir, group_func)

686

if exc_info and isinstance(exc_info[1], error.TestBaseException):

687

return False

688

elif exc_info:

689

raise exc_info[0], exc_info[1], exc_info[2]

690

else:

691

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

692

693

694

def _run_group(self, name, subdir, function, *args, **dargs):

695

"""\

696

Underlying method for running something inside of a group.

697

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

698

result, exc_info = None, None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

699

try:

700

self.record('START', subdir, name)

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

701

result = function(*args, **dargs)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

702

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

703

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

704

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

705

except Exception, e:

706

err_msg = str(e) + '\n'

707

err_msg += traceback.format_exc()

708

self.record('END ABORT', subdir, name, err_msg)

709

raise error.JobError(name + ' failed\n' + traceback.format_exc())

710

else:

711

self.record('END GOOD', subdir, name)

712

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

713

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

714

715

716

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

722

"""

723

724

name = function.__name__

725

726

# Allow the tag for the group to be specified.

727

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

731

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

732

733

734

def run_reboot(self, reboot_func, get_kernel_func):

735

"""\

736

A specialization of run_group meant specifically for handling

737

a reboot. Includes support for capturing the kernel version

738

after the reboot.

739

740

reboot_func: a function that carries out the reboot

741

742

get_kernel_func: a function that returns a string

743

representing the kernel version.

744

"""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

745

try:

746

self.record('START', None, 'reboot')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

747

reboot_func()

748

except Exception, e:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

749

err_msg = str(e) + '\n' + traceback.format_exc()

750

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

751

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

752

else:

753

kernel = get_kernel_func()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

754

self.record('END GOOD', None, 'reboot',

Dale Curtis

74a314b

2011-06-23 14:55:46 -0700

[diff] [blame]

755

optional_fields={"kernel": kernel})

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

756

757

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

758

def run_control(self, path):

759

"""Execute a control file found at path (relative to the autotest

760

path). Intended for executing a control file within a control file,

761

not for running the top-level job control file."""

762

path = os.path.join(self.autodir, path)

763

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

764

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

765

766

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

767

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

768

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

773

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

774

775

776

def _add_sysinfo_loggable(self, loggable, on_every_test):

777

if on_every_test:

778

self.sysinfo.test_loggables.add(loggable)

779

else:

780

self.sysinfo.boot_loggables.add(loggable)

781

782

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

783

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

784

"""Poll all the warning loggers and extract any new warnings that have

785

been logged. If the warnings belong to a category that is currently

786

disabled, this method will discard them and they will no longer be

787

retrievable.

788

789

Returns a list of (timestamp, message) tuples, where timestamp is an

790

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

791

warnings = []

792

while True:

793

# pull in a line of output from every logger that has

794

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

795

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

796

closed_loggers = set()

797

for logger in loggers:

798

line = logger.readline()

799

# record any broken pipes (aka line == empty)

800

if len(line) == 0:

801

closed_loggers.add(logger)

802

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

803

# parse out the warning

804

timestamp, msgtype, msg = line.split('\t', 2)

805

timestamp = int(timestamp)

806

# if the warning is valid, add it to the results

807

if self.warning_manager.is_valid(timestamp, msgtype):

808

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

809

810

# stop listening to loggers that are closed

811

self.warning_loggers -= closed_loggers

812

813

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

822

def _unique_subdirectory(self, base_subdirectory_name):

823

"""Compute a unique results subdirectory based on the given name.

824

825

Appends base_subdirectory_name with a number as necessary to find a

826

directory name that doesn't already exist.

827

"""

828

subdirectory = base_subdirectory_name

829

counter = 1

830

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

831

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

836

def get_record_context(self):

837

"""Returns an object representing the current job.record context.

838

839

The object returned is an opaque object with a 0-arg restore method

840

which can be called to restore the job.record context (i.e. indentation)

841

to the current level. The intention is that it should be used when

842

something external which generate job.record calls (e.g. an autotest

843

client) can fail catastrophically and the server job record state

844

needs to be reset to its original "known good" state.

845

846

@return: A context object with a 0-arg restore() method."""

847

return self._indenter.get_context()

848

849

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

850

def record_summary(self, status_code, test_name, reason='', attributes=None,

851

distinguishing_attributes=(), child_test_ids=None):

852

"""Record a summary test result.

853

854

@param status_code: status code string, see

855

common_lib.log.is_valid_status()

856

@param test_name: name of the test

857

@param reason: (optional) string providing detailed reason for test

858

outcome

859

@param attributes: (optional) dict of string keyvals to associate with

860

this result

861

@param distinguishing_attributes: (optional) list of attribute names

862

that should be used to distinguish identically-named test

863

results. These attributes should be present in the attributes

864

parameter. This is used to generate user-friendly subdirectory

865

names.

866

@param child_test_ids: (optional) list of test indices for test results

867

used in generating this result.

868

"""

869

subdirectory_name_parts = [test_name]

870

for attribute in distinguishing_attributes:

871

assert attributes

872

assert attribute in attributes, '%s not in %s' % (attribute,

873

attributes)

874

subdirectory_name_parts.append(attributes[attribute])

875

base_subdirectory_name = '.'.join(subdirectory_name_parts)

876

877

subdirectory = self._unique_subdirectory(base_subdirectory_name)

878

subdirectory_path = os.path.join(self.resultdir, subdirectory)

879

os.mkdir(subdirectory_path)

880

881

self.record(status_code, subdirectory, test_name,

882

status=reason, optional_fields={'is_summary': True})

883

884

if attributes:

885

utils.write_keyval(subdirectory_path, attributes)

886

887

if child_test_ids:

888

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

889

summary_data = {'child_test_ids': ids_string}

890

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

894

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

895

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

896

self.record("INFO", None, None,

897

"disabling %s warnings" % warning_type,

898

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

899

900

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

901

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

902

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

903

self.record("INFO", None, None,

904

"enabling %s warnings" % warning_type,

905

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

906

907

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

908

def get_status_log_path(self, subdir=None):

909

"""Return the path to the job status log.

910

911

@param subdir - Optional paramter indicating that you want the path

912

to a subdirectory status log.

913

914

@returns The path where the status log should be.

915

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

916

if self.resultdir:

917

if subdir:

918

return os.path.join(self.resultdir, subdir, "status.log")

919

else:

920

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

921

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

922

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

923

924

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

925

def _update_uncollected_logs_list(self, update_func):

926

"""Updates the uncollected logs list in a multi-process safe manner.

927

928

@param update_func - a function that updates the list of uncollected

929

logs. Should take one parameter, the list to be updated.

930

"""

Dan Shi

2013-04-12 09:31:29 -0700

[diff] [blame]

931

# Skip log collection if file _uncollected_log_file does not exist.

932

if not (self._uncollected_log_file and

933

os.path.exists(self._uncollected_log_file)):

934

return

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

935

if self._uncollected_log_file:

936

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

937

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

938

try:

939

uncollected_logs = pickle.load(log_file)

940

update_func(uncollected_logs)

941

log_file.seek(0)

942

log_file.truncate()

943

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

944

log_file.flush()

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

945

finally:

946

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

951

"""Adds a new set of client logs to the list of uncollected logs,

952

to allow for future log recovery.

953

954

@param host - the hostname of the machine holding the logs

955

@param remote_path - the directory on the remote machine holding logs

956

@param local_path - the local directory to copy the logs into

957

"""

958

def update_func(logs_list):

959

logs_list.append((hostname, remote_path, local_path))

960

self._update_uncollected_logs_list(update_func)

961

962

963

def remove_client_log(self, hostname, remote_path, local_path):

964

"""Removes a set of client logs from the list of uncollected logs,

965

to allow for future log recovery.

966

967

@param host - the hostname of the machine holding the logs

968

@param remote_path - the directory on the remote machine holding logs

969

@param local_path - the local directory to copy the logs into

970

"""

971

def update_func(logs_list):

972

logs_list.remove((hostname, remote_path, local_path))

973

self._update_uncollected_logs_list(update_func)

974

975

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

976

def get_client_logs(self):

977

"""Retrieves the list of uncollected logs, if it exists.

978

979

@returns A list of (host, remote_path, local_path) tuples. Returns

980

an empty list if no uncollected logs file exists.

981

"""

982

log_exists = (self._uncollected_log_file and

983

os.path.exists(self._uncollected_log_file))

984

if log_exists:

985

return pickle.load(open(self._uncollected_log_file))

else:

return []

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

990

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

991

"""

992

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

993

994

This sets up the control file API by importing modules and making them

995

available under the appropriate names within namespace.

996

997

For use by _execute_code().

998

999

Args:

1000

namespace: The namespace dictionary to fill in.

1001

protect: Boolean. If True (the default) any operation that would

1002

clobber an existing entry in namespace will cause an error.

1003

Raises:

1004

error.AutoservError: When a name would be clobbered by import.

1005

"""

1006

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1007

"""

1008

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1009

1010

Args:

1011

module_name: The string module name.

1012

names: A limiting list of names to import from module_name. If

1013

empty (the default), all names are imported from the module

1014

similar to a "from foo.bar import *" statement.

1015

Raises:

1016

error.AutoservError: When a name being imported would clobber

1017

a name already in namespace.

1018

"""

1019

module = __import__(module_name, {}, {}, names)

1020

1021

# No names supplied? Import * from the lowest level module.

1022

# (Ugh, why do I have to implement this part myself?)

1023

if not names:

1024

for submodule_name in module_name.split('.')[1:]:

1025

module = getattr(module, submodule_name)

1026

if hasattr(module, '__all__'):

1027

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

1032

# doesn't override anything that already exists.

1033

for name in names:

1034

# Check for conflicts to help prevent future problems.

1035

if name in namespace and protect:

1036

if namespace[name] is not getattr(module, name):

1037

raise error.AutoservError('importing name '

1038

'%s from %s %r would override %r' %

1039

(name, module_name, getattr(module, name),

1040

namespace[name]))

1041

else:

1042

# Encourage cleanliness and the use of __all__ for a

1043

# more concrete API with less surprises on '*' imports.

1044

warnings.warn('%s (%r) being imported from %s for use '

1045

'in server control files is not the '

1046

'first occurrance of that import.' %

1047

(name, namespace[name], module_name))

1048

1049

namespace[name] = getattr(module, name)

1050

1051

1052

# This is the equivalent of prepending a bunch of import statements to

1053

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

1054

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1055

_import_names('autotest_lib.server',

1056

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

1057

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

1058

_import_names('autotest_lib.server.subcommand',

1059

('parallel', 'parallel_simple', 'subcommand'))

1060

_import_names('autotest_lib.server.utils',

1061

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

1062

_import_names('autotest_lib.client.common_lib.error')

1063

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

1064

1065

# Inject ourself as the job object into other classes within the API.

1066

# (Yuck, this injection is a gross thing be part of a public API. -gps)

1067

#

1068

# XXX Base & SiteAutotest do not appear to use .job. Who does?

1069

namespace['autotest'].Autotest.job = self

1070

# server.hosts.base_classes.Host uses .job.

1071

namespace['hosts'].Host.job = self

Eric Li

10222b8

2010-11-24 09:33:15 -0800

[diff] [blame]

1072

namespace['hosts'].factory.ssh_user = self._ssh_user

1073

namespace['hosts'].factory.ssh_port = self._ssh_port

1074

namespace['hosts'].factory.ssh_pass = self._ssh_pass

Fang Deng

2013-08-20 12:59:46 -0700

[diff] [blame]

1075

namespace['hosts'].factory.ssh_verbosity_flag = (

1076

self._ssh_verbosity_flag)

Aviv Keshet

2013-09-04 14:06:29 -0700

[diff] [blame^]

1077

namespace['hosts'].factory.ssh_options = self._ssh_options

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1078

1079

1080

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1081

"""

1082

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1083

1084

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

1089

namespace: A dict containing names to make available during execution.

1090

protect: Boolean. If True (the default) a copy of the namespace dict

1091

is used during execution to prevent the code from modifying its

1092

contents outside of this function. If False the raw dict is

1093

passed in and modifications will be allowed.

1094

"""

1095

if protect:

1096

namespace = namespace.copy()

1097

self._fill_server_control_namespace(namespace, protect=protect)

1098

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1099

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1100

machines_text = '\n'.join(self.machines) + '\n'

1101

# Only rewrite the file if it does not match our machine list.

1102

try:

1103

machines_f = open(MACHINES_FILENAME, 'r')

1104

existing_machines_text = machines_f.read()

1105

machines_f.close()

1106

except EnvironmentError:

1107

existing_machines_text = None

1108

if machines_text != existing_machines_text:

1109

utils.open_write_close(MACHINES_FILENAME, machines_text)

1110

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1111

1112

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1113

def _parse_status(self, new_line):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1114

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1115

return

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1116

new_tests = self.parser.process_lines([new_line])

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1117

for test in new_tests:

1118

self.__insert_test(test)

1119

1120

1121

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1122

"""

1123

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1124

database. This method will not raise an exception, even if an

1125

error occurs during the insert, to avoid failing a test

1126

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1127

self.num_tests_run += 1

1128

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1129

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1130

try:

1131

self.results_db.insert_test(self.job_model, test)

1132

except Exception:

1133

msg = ("WARNING: An unexpected error occured while "

1134

"inserting test results into the database. "

1135

"Ignoring error.\n" + traceback.format_exc())

1136

print >> sys.stderr, msg

1137

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1138

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1139

def preprocess_client_state(self):

1140

"""

1141

Produce a state file for initializing the state of a client job.

1142

1143

Creates a new client state file with all the current server state, as

1144

well as some pre-set client state.

1145

1146

@returns The path of the file the state was written into.

1147

"""

1148

# initialize the sysinfo state

1149

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1150

1151

# dump the state out to a tempfile

1152

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1153

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1154

1155

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1156

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1161

"""

1162

Update the state of this job with the state from a client job.

1163

1164

Updates the state of the server side of a job with the final state

1165

of a client job that was run. Updates the non-client-specific state,

1166

pulls in some specific bits from the client-specific state, and then

1167

discards the rest. Removes the state file afterwards

1168

1169

@param state_file A path to the state file from the client.

1170

"""

1171

# update the on-disk state

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1172

try:

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1173

self._state.read_from_file(state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1174

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1175

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1176

# ignore file-not-found errors

1177

if e.errno != errno.ENOENT:

1178

raise

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1179

else:

1180

logging.debug('Client state file %s not found', state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1181

1182

# update the sysinfo state

1183

if self._state.has('client', 'sysinfo'):

1184

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1185

1186

# drop all the client-specific state

1187

self._state.discard_namespace('client')

1188

1189

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

1190

def clear_all_known_hosts(self):

1191

"""Clears known hosts files for all AbstractSSHHosts."""

1192

for host in self.hosts:

1193

if isinstance(host, abstract_ssh.AbstractSSHHost):

1194

host.clear_known_hosts()

1195

1196

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1197

class warning_manager(object):

1198

"""Class for controlling warning logs. Manages the enabling and disabling

1199

of warnings."""

1200

def __init__(self):

1201

# a map of warning types to a list of disabled time intervals

1202

self.disabled_warnings = {}

1203

1204

1205

def is_valid(self, timestamp, warning_type):

1206

"""Indicates if a warning (based on the time it occured and its type)

1207

is a valid warning. A warning is considered "invalid" if this type of

1208

warning was marked as "disabled" at the time the warning occured."""

1209

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1210

for start, end in disabled_intervals:

1211

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1217

"""As of now, disables all further warnings of this type."""

1218

intervals = self.disabled_warnings.setdefault(warning_type, [])

1219

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1220

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1221

1222

1223

def enable_warnings(self, warning_type, current_time_func=time.time):

1224

"""As of now, enables all further warnings of this type."""

1225

intervals = self.disabled_warnings.get(warning_type, [])

1226

if intervals and intervals[-1][1] is None:

jadmanski