Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2010-01-06 18:37:22 +0000

[diff] [blame]

10

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

11

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

12

from autotest_lib.client.common_lib import base_job

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

13

from autotest_lib.client.common_lib import error, log, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

14

from autotest_lib.client.common_lib import logging_manager

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

15

from autotest_lib.server import test, subcommand, profilers

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

16

from autotest_lib.server.hosts import abstract_ssh

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

17

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

18

19

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

20

def _control_segment_path(name):

21

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

22

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

23

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

24

25

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

26

CLIENT_CONTROL_FILENAME = 'control'

27

SERVER_CONTROL_FILENAME = 'control.srv'

28

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

29

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

30

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

31

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

32

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

33

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

34

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

35

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

36

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

37

REPAIR_CONTROL_FILE = _control_segment_path('repair')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

38

39

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

40

# by default provide a stub that generates no site data

41

def _get_site_job_data_dummy(job):

return {}

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

45

# load up site-specific code for generating site-specific job data

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

46

get_site_job_data = utils.import_site_function(__file__,

jadmanski

c0a623d

2009-03-03 21:11:48 +0000

[diff] [blame]

47

"autotest_lib.server.site_server_job", "get_site_job_data",

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

48

_get_site_job_data_dummy)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

49

50

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

51

class status_indenter(base_job.status_indenter):

52

"""Provide a simple integer-backed status indenter."""

def __init__(self):

self._indent = 0

@property

def indent(self):

return self._indent

def increment(self):

self._indent += 1

def decrement(self):

self._indent -= 1

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

70

def get_context(self):

71

"""Returns a context object for use by job.get_record_context."""

72

class context(object):

73

def __init__(self, indenter, indent):

74

self._indenter = indenter

75

self._indent = indent

76

def restore(self):

77

self._indenter._indent = self._indent

78

return context(self, self._indent)

79

80

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

81

class server_job_record_hook(object):

82

"""The job.record hook for server job. Used to inject WARN messages from

83

the console or vlm whenever new logs are written, and to echo any logs

84

to INFO level logging. Implemented as a class so that it can use state to

85

block recursive calls, so that the hook can call job.record itself to

86

log WARN messages.

87

88

Depends on job._read_warnings and job._logger.

89

"""

90

def __init__(self, job):

91

self._job = job

92

self._being_called = False

93

94

95

def __call__(self, entry):

96

"""A wrapper around the 'real' record hook, the _hook method, which

97

prevents recursion. This isn't making any effort to be threadsafe,

98

the intent is to outright block infinite recursion via a

99

job.record->_hook->job.record->_hook->job.record... chain."""

100

if self._being_called:

101

return

102

self._being_called = True

103

try:

104

self._hook(self._job, entry)

105

finally:

106

self._being_called = False

@staticmethod

def _hook(job, entry):

111

"""The core hook, which can safely call job.record."""

112

entries = []

113

# poll all our warning loggers for new warnings

114

for timestamp, msg in job._read_warnings():

115

warning_entry = base_job.status_log_entry(

116

'WARN', None, None, msg, {}, timestamp=timestamp)

117

entries.append(warning_entry)

118

job.record_entry(warning_entry)

119

# echo rendered versions of all the status logs to info

120

entries.append(entry)

121

for entry in entries:

122

rendered_entry = job._logger.render_entry(entry)

123

logging.info(rendered_entry)

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

124

job._parse_status(rendered_entry)

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

125

126

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

127

class base_server_job(base_job.base_job):

128

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

129

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

130

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

139

"""

140

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

141

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

142

143

def __init__(self, control, args, resultdir, label, user, machines,

144

client=False, parse_job='',

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

145

ssh_user='root', ssh_port=22, ssh_pass='',

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

146

group_name='', tag='',

147

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

148

"""

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

149

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

150

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

151

@param control: The pathname of the control file.

152

@param args: Passed to the control file.

153

@param resultdir: Where to throw the results.

154

@param label: Description of the job.

155

@param user: Username for the job (email address).

156

@param client: True if this is a client-side control file.

157

@param parse_job: string, if supplied it is the job execution tag that

158

the results will be passed through to the TKO parser with.

159

@param ssh_user: The SSH username. [root]

160

@param ssh_port: The SSH port number. [22]

161

@param ssh_pass: The SSH passphrase, if needed.

162

@param group_name: If supplied, this will be written out as

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

163

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

164

@param tag: The job execution tag from the scheduler. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

165

@param control_filename: The filename where the server control file

166

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

167

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

168

super(base_server_job, self).__init__(resultdir=resultdir)

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

169

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

170

path = os.path.dirname(__file__)

171

self.control = control

172

self._uncollected_log_file = os.path.join(self.resultdir,

173

'uncollected_logs')

174

debugdir = os.path.join(self.resultdir, 'debug')

175

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

182

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

183

self.args = args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

184

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

185

self._client = client

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

186

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

187

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

188

self._ssh_user = ssh_user

189

self._ssh_port = ssh_port

190

self._ssh_pass = ssh_pass

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

191

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

192

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

193

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

194

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

195

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

196

self._control_filename = control_filename

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

197

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

198

self.logging = logging_manager.get_logging_manager(

199

manage_stdout_and_stderr=True, redirect_fds=True)

200

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

201

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

202

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

203

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

204

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

205

job_data = {'label' : label, 'user' : user,

206

'hostname' : ','.join(machines),

Eric Li

861b2d5

2011-02-04 14:50:35 -0800

[diff] [blame^]

207

'drone' : platform.node(),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

208

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

209

'job_started' : str(int(time.time()))}

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

210

if group_name:

211

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

212

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

213

# only write these keyvals out on the first job in a resultdir

214

if 'job_started' not in utils.read_keyval(self.resultdir):

215

job_data.update(get_site_job_data(self))

216

utils.write_keyval(self.resultdir, job_data)

217

218

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

219

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

220

self.pkgmgr = packages.PackageManager(

221

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

222

self.num_tests_run = 0

223

self.num_tests_failed = 0

224

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

225

self._register_subcommand_hooks()

226

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

227

# these components aren't usable on the server

228

self.bootloader = None

229

self.harness = None

230

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

231

# set up the status logger

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

232

self._indenter = status_indenter()

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

233

self._logger = base_job.status_logger(

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

234

self, self._indenter, 'status.log', 'status.log',

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

235

record_hook=server_job_record_hook(self))

236

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

237

238

@classmethod

239

def _find_base_directories(cls):

240

"""

241

Determine locations of autodir, clientdir and serverdir. Assumes

242

that this file is located within serverdir and uses __file__ along

243

with relative paths to resolve the location.

244

"""

245

serverdir = os.path.abspath(os.path.dirname(__file__))

246

autodir = os.path.normpath(os.path.join(serverdir, '..'))

247

clientdir = os.path.join(autodir, 'client')

248

return autodir, clientdir, serverdir

249

250

251

def _find_resultdir(self, resultdir):

252

"""

253

Determine the location of resultdir. For server jobs we expect one to

254

always be explicitly passed in to __init__, so just return that.

255

"""

256

if resultdir:

257

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

261

jadmanski

2010-06-11 14:32:58 +0000

[diff] [blame]

262

def _get_status_logger(self):

263

"""Return a reference to the status logger."""

return self._logger

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

267

@staticmethod

268

def _load_control_file(path):

269

f = open(path)

270

try:

271

control_file = f.read()

272

finally:

273

f.close()

274

return re.sub('\r', '', control_file)

275

276

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

277

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

278

"""

279

Register some hooks into the subcommand modules that allow us

280

to properly clean up self.hosts created in forked subprocesses.

281

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

282

def on_fork(cmd):

283

self._existing_hosts_on_fork = set(self.hosts)

284

def on_join(cmd):

285

new_hosts = self.hosts - self._existing_hosts_on_fork

286

for host in new_hosts:

287

host.close()

288

subcommand.subcommand.register_fork_hook(on_fork)

289

subcommand.subcommand.register_join_hook(on_join)

290

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

291

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

292

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

293

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

294

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

295

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

296

the database if necessary.

297

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

298

if not self._using_parser:

299

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

300

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

301

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

302

parse_log = open(parse_log, 'w', 0)

303

tko_utils.redirect_parser_debugging(parse_log)

304

# create a job model object and set up the db

305

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

306

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

307

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

308

self.parser.start(self.job_model)

309

# check if a job already exists in the db and insert it if

310

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

311

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

312

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

313

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

314

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

315

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

316

self.job_model.index = job_idx

317

self.job_model.machine_idx = machine_idx

318

319

320

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

321

"""

322

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

323

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

324

remaining test results to the results db)

325

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

326

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

327

return

328

final_tests = self.parser.end()

329

for test in final_tests:

330

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

331

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def verify(self):

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

336

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

337

if self.resultdir:

338

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

339

try:

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

340

namespace = {'machines' : self.machines, 'job' : self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

341

'ssh_user' : self._ssh_user,

342

'ssh_port' : self._ssh_port,

343

'ssh_pass' : self._ssh_pass}

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

344

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

345

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

346

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

347

self.record('ABORT', None, None, msg)

raise

def repair(self, host_protection):

352

if not self.machines:

353

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

354

if self.resultdir:

355

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

356

namespace = {'machines': self.machines, 'job': self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

357

'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,

358

'ssh_pass': self._ssh_pass,

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

359

'protection_level': host_protection}

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

360

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

361

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def precheck(self):

"""

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

372

"""

373

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

379

"""

380

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

386

"""

387

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

392

def _make_parallel_wrapper(self, function, machines, log):

393

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

394

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

395

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

396

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

397

self._parse_job += "/" + machine

398

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

399

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

400

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

401

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

402

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

403

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

404

result = function(machine)

405

self.cleanup_parser()

406

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

407

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

408

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

409

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

410

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

411

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

412

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

413

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

414

result = function(machine)

415

return result

416

else:

417

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

422

return_results=False):

423

"""

424

Run 'function' using parallel_simple, with an extra wrapper to handle

425

the necessary setup for continuous parsing, if possible. If continuous

426

parsing is already properly initialized then this should just work.

427

428

@param function: A callable to run in parallel given each machine.

429

@param machines: A list of machine names to be passed one per subcommand

430

invocation of function.

431

@param log: If True, output will be written to output in a subdirectory

432

named after each machine.

433

@param timeout: Seconds after which the function call should timeout.

434

@param return_results: If True instead of an AutoServError being raised

435

on any error a list of the results|exceptions from the function

436

called on each arg is returned. [default: False]

437

438

@raises error.AutotestError: If any of the functions failed.

439

"""

440

wrapper = self._make_parallel_wrapper(function, machines, log)

441

return subcommand.parallel_simple(wrapper, machines,

442

log=log, timeout=timeout,

443

return_results=return_results)

444

445

446

def parallel_on_machines(self, function, machines, timeout=None):

447

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

448

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

449

@param machines: A list of machines to call function(machine) on.

450

@param timeout: Seconds after which the function call should timeout.

451

452

@returns A list of machines on which function(machine) returned

453

without raising an exception.

454

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

455

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

456

return_results=True)

457

success_machines = []

458

for result, machine in itertools.izip(results, machines):

459

if not isinstance(result, Exception):

460

success_machines.append(machine)

461

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

462

463

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

464

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

465

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

466

collect_crashdumps=True, namespace={}, control=None,

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

467

control_file_dir=None, only_collect_crashinfo=False):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

468

# for a normal job, make sure the uncollected logs file exists

469

# for a crashinfo-only run it should already exist, bail out otherwise

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

470

created_uncollected_logs = False

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

471

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

472

if only_collect_crashinfo:

473

# if this is a crashinfo-only run, and there were no existing

474

# uncollected logs, just bail out early

475

logging.info("No existing uncollected logs, "

476

"skipping crashinfo collection")

477

return

478

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

479

log_file = open(self._uncollected_log_file, "w")

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

480

pickle.dump([], log_file)

481

log_file.close()

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

482

created_uncollected_logs = True

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

483

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

484

# use a copy so changes don't affect the original dictionary

485

namespace = namespace.copy()

486

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

487

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

488

if self.control is None:

489

control = ''

490

else:

491

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

492

if control_file_dir is None:

493

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

494

495

self.aborted = False

496

namespace['machines'] = machines

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

497

namespace['args'] = self.args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

498

namespace['job'] = self

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

499

namespace['ssh_user'] = self._ssh_user

500

namespace['ssh_port'] = self._ssh_port

501

namespace['ssh_pass'] = self._ssh_pass

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

502

test_start_time = int(time.time())

503

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

504

if self.resultdir:

505

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

506

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

507

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

508

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

509

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

510

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

511

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

512

try:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

513

try:

514

if install_before and machines:

515

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

516

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

517

if only_collect_crashinfo:

518

return

519

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

520

# determine the dir to write the control files to

521

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

522

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

523

if cfd_specified:

524

temp_control_file_dir = None

525

else:

526

temp_control_file_dir = tempfile.mkdtemp(

527

suffix='temp_control_file_dir')

528

control_file_dir = temp_control_file_dir

529

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

530

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

531

client_control_file = os.path.join(control_file_dir,

532

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

533

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

534

namespace['control'] = control

535

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

536

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

537

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

538

else:

539

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

540

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

541

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

542

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

543

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

544

# no error occured, so we don't need to collect crashinfo

545

collect_crashinfo = False

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

546

except Exception, e:

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

547

try:

548

logging.exception(

549

'Exception escaped control file, job aborting:')

Eric Li

6f27d4f

2010-09-29 10:55:17 -0700

[diff] [blame]

550

self.record('INFO', None, None, str(e),

551

{'job_abort_reason': str(e)})

showard

2009-10-14 16:08:39 +0000

[diff] [blame]

552

except:

553

pass # don't let logging exceptions here interfere

554

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

555

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

556

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

557

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

558

try:

559

shutil.rmtree(temp_control_file_dir)

560

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

561

logging.warn('Could not remove temp directory %s: %s',

562

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

563

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

564

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

565

namespace['test_start_time'] = test_start_time

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

566

if collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

567

# includes crashdumps

568

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

569

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

570

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

571

if self._uncollected_log_file and created_uncollected_logs:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

572

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

573

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

574

if cleanup and machines:

575

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

576

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

577

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

578

579

580

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

581

"""

582

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

583

584

tag

585

tag to add to testname

586

url

587

url of the test to run

588

"""

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

589

group, testname = self.pkgmgr.get_package_name(url, 'test')

590

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

591

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

596

except error.TestBaseException, e:

597

self.record(e.exit_status, subdir, testname, str(e))

598

raise

599

except Exception, e:

600

info = str(e) + "\n" + traceback.format_exc()

601

self.record('FAIL', subdir, testname, info)

602

raise

603

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

604

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

605

606

result, exc_info = self._run_group(testname, subdir, group_func)

607

if exc_info and isinstance(exc_info[1], error.TestBaseException):

608

return False

609

elif exc_info:

610

raise exc_info[0], exc_info[1], exc_info[2]

611

else:

612

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

613

614

615

def _run_group(self, name, subdir, function, *args, **dargs):

616

"""\

617

Underlying method for running something inside of a group.

618

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

619

result, exc_info = None, None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

620

try:

621

self.record('START', subdir, name)

jadmanski

5205363

2010-06-11 21:08:10 +0000

[diff] [blame]

622

result = function(*args, **dargs)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

623

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

624

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

625

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

626

except Exception, e:

627

err_msg = str(e) + '\n'

628

err_msg += traceback.format_exc()

629

self.record('END ABORT', subdir, name, err_msg)

630

raise error.JobError(name + ' failed\n' + traceback.format_exc())

631

else:

632

self.record('END GOOD', subdir, name)

633

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

634

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

635

636

637

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

643

"""

644

645

name = function.__name__

646

647

# Allow the tag for the group to be specified.

648

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

652

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

653

654

655

def run_reboot(self, reboot_func, get_kernel_func):

656

"""\

657

A specialization of run_group meant specifically for handling

658

a reboot. Includes support for capturing the kernel version

659

after the reboot.

660

661

reboot_func: a function that carries out the reboot

662

663

get_kernel_func: a function that returns a string

664

representing the kernel version.

665

"""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

666

try:

667

self.record('START', None, 'reboot')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

668

reboot_func()

669

except Exception, e:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

670

err_msg = str(e) + '\n' + traceback.format_exc()

671

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

672

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

673

else:

674

kernel = get_kernel_func()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

675

self.record('END GOOD', None, 'reboot',

676

optional_fields={"kernel": kernel})

677

678

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

679

def run_control(self, path):

680

"""Execute a control file found at path (relative to the autotest

681

path). Intended for executing a control file within a control file,

682

not for running the top-level job control file."""

683

path = os.path.join(self.autodir, path)

684

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

685

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

686

687

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

688

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

689

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

694

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

695

696

697

def _add_sysinfo_loggable(self, loggable, on_every_test):

698

if on_every_test:

699

self.sysinfo.test_loggables.add(loggable)

700

else:

701

self.sysinfo.boot_loggables.add(loggable)

702

703

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

704

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

705

"""Poll all the warning loggers and extract any new warnings that have

706

been logged. If the warnings belong to a category that is currently

707

disabled, this method will discard them and they will no longer be

708

retrievable.

709

710

Returns a list of (timestamp, message) tuples, where timestamp is an

711

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

712

warnings = []

713

while True:

714

# pull in a line of output from every logger that has

715

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

716

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

717

closed_loggers = set()

718

for logger in loggers:

719

line = logger.readline()

720

# record any broken pipes (aka line == empty)

721

if len(line) == 0:

722

closed_loggers.add(logger)

723

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

724

# parse out the warning

725

timestamp, msgtype, msg = line.split('\t', 2)

726

timestamp = int(timestamp)

727

# if the warning is valid, add it to the results

728

if self.warning_manager.is_valid(timestamp, msgtype):

729

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

730

731

# stop listening to loggers that are closed

732

self.warning_loggers -= closed_loggers

733

734

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

743

def _unique_subdirectory(self, base_subdirectory_name):

744

"""Compute a unique results subdirectory based on the given name.

745

746

Appends base_subdirectory_name with a number as necessary to find a

747

directory name that doesn't already exist.

748

"""

749

subdirectory = base_subdirectory_name

750

counter = 1

751

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

752

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

jadmanski

2010-06-11 21:08:10 +0000

[diff] [blame]

757

def get_record_context(self):

758

"""Returns an object representing the current job.record context.

759

760

The object returned is an opaque object with a 0-arg restore method

761

which can be called to restore the job.record context (i.e. indentation)

762

to the current level. The intention is that it should be used when

763

something external which generate job.record calls (e.g. an autotest

764

client) can fail catastrophically and the server job record state

765

needs to be reset to its original "known good" state.

766

767

@return: A context object with a 0-arg restore() method."""

768

return self._indenter.get_context()

769

770

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

771

def record_summary(self, status_code, test_name, reason='', attributes=None,

772

distinguishing_attributes=(), child_test_ids=None):

773

"""Record a summary test result.

774

775

@param status_code: status code string, see

776

common_lib.log.is_valid_status()

777

@param test_name: name of the test

778

@param reason: (optional) string providing detailed reason for test

779

outcome

780

@param attributes: (optional) dict of string keyvals to associate with

781

this result

782

@param distinguishing_attributes: (optional) list of attribute names

783

that should be used to distinguish identically-named test

784

results. These attributes should be present in the attributes

785

parameter. This is used to generate user-friendly subdirectory

786

names.

787

@param child_test_ids: (optional) list of test indices for test results

788

used in generating this result.

789

"""

790

subdirectory_name_parts = [test_name]

791

for attribute in distinguishing_attributes:

792

assert attributes

793

assert attribute in attributes, '%s not in %s' % (attribute,

794

attributes)

795

subdirectory_name_parts.append(attributes[attribute])

796

base_subdirectory_name = '.'.join(subdirectory_name_parts)

797

798

subdirectory = self._unique_subdirectory(base_subdirectory_name)

799

subdirectory_path = os.path.join(self.resultdir, subdirectory)

800

os.mkdir(subdirectory_path)

801

802

self.record(status_code, subdirectory, test_name,

803

status=reason, optional_fields={'is_summary': True})

804

805

if attributes:

806

utils.write_keyval(subdirectory_path, attributes)

807

808

if child_test_ids:

809

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

810

summary_data = {'child_test_ids': ids_string}

811

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

815

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

816

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

817

self.record("INFO", None, None,

818

"disabling %s warnings" % warning_type,

819

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

820

821

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

822

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

823

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

824

self.record("INFO", None, None,

825

"enabling %s warnings" % warning_type,

826

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

827

828

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

829

def get_status_log_path(self, subdir=None):

830

"""Return the path to the job status log.

831

832

@param subdir - Optional paramter indicating that you want the path

833

to a subdirectory status log.

834

835

@returns The path where the status log should be.

836

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

837

if self.resultdir:

838

if subdir:

839

return os.path.join(self.resultdir, subdir, "status.log")

840

else:

841

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

842

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

843

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

844

845

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

846

def _update_uncollected_logs_list(self, update_func):

847

"""Updates the uncollected logs list in a multi-process safe manner.

848

849

@param update_func - a function that updates the list of uncollected

850

logs. Should take one parameter, the list to be updated.

851

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

852

if self._uncollected_log_file:

853

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

854

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

855

try:

856

uncollected_logs = pickle.load(log_file)

857

update_func(uncollected_logs)

858

log_file.seek(0)

859

log_file.truncate()

860

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

861

log_file.flush()

jadmanski

6bb32d7

2009-03-19 20:25:24 +0000

[diff] [blame]

862

finally:

863

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

868

"""Adds a new set of client logs to the list of uncollected logs,

869

to allow for future log recovery.

870

871

@param host - the hostname of the machine holding the logs

872

@param remote_path - the directory on the remote machine holding logs

873

@param local_path - the local directory to copy the logs into

874

"""

875

def update_func(logs_list):

876

logs_list.append((hostname, remote_path, local_path))

877

self._update_uncollected_logs_list(update_func)

878

879

880

def remove_client_log(self, hostname, remote_path, local_path):

881

"""Removes a set of client logs from the list of uncollected logs,

882

to allow for future log recovery.

883

884

@param host - the hostname of the machine holding the logs

885

@param remote_path - the directory on the remote machine holding logs

886

@param local_path - the local directory to copy the logs into

887

"""

888

def update_func(logs_list):

889

logs_list.remove((hostname, remote_path, local_path))

890

self._update_uncollected_logs_list(update_func)

891

892

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

893

def get_client_logs(self):

894

"""Retrieves the list of uncollected logs, if it exists.

895

896

@returns A list of (host, remote_path, local_path) tuples. Returns

897

an empty list if no uncollected logs file exists.

898

"""

899

log_exists = (self._uncollected_log_file and

900

os.path.exists(self._uncollected_log_file))

901

if log_exists:

902

return pickle.load(open(self._uncollected_log_file))

else:

return []

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

907

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

908

"""

909

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

910

911

This sets up the control file API by importing modules and making them

912

available under the appropriate names within namespace.

913

914

For use by _execute_code().

915

916

Args:

917

namespace: The namespace dictionary to fill in.

918

protect: Boolean. If True (the default) any operation that would

919

clobber an existing entry in namespace will cause an error.

920

Raises:

921

error.AutoservError: When a name would be clobbered by import.

922

"""

923

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

924

"""

925

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

926

927

Args:

928

module_name: The string module name.

929

names: A limiting list of names to import from module_name. If

930

empty (the default), all names are imported from the module

931

similar to a "from foo.bar import *" statement.

932

Raises:

933

error.AutoservError: When a name being imported would clobber

934

a name already in namespace.

935

"""

936

module = __import__(module_name, {}, {}, names)

937

938

# No names supplied? Import * from the lowest level module.

939

# (Ugh, why do I have to implement this part myself?)

940

if not names:

941

for submodule_name in module_name.split('.')[1:]:

942

module = getattr(module, submodule_name)

943

if hasattr(module, '__all__'):

944

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

949

# doesn't override anything that already exists.

950

for name in names:

951

# Check for conflicts to help prevent future problems.

952

if name in namespace and protect:

953

if namespace[name] is not getattr(module, name):

954

raise error.AutoservError('importing name '

955

'%s from %s %r would override %r' %

956

(name, module_name, getattr(module, name),

957

namespace[name]))

958

else:

959

# Encourage cleanliness and the use of __all__ for a

960

# more concrete API with less surprises on '*' imports.

961

warnings.warn('%s (%r) being imported from %s for use '

962

'in server control files is not the '

963

'first occurrance of that import.' %

964

(name, namespace[name], module_name))

965

966

namespace[name] = getattr(module, name)

967

968

969

# This is the equivalent of prepending a bunch of import statements to

970

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

971

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

972

_import_names('autotest_lib.server',

973

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

974

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

975

_import_names('autotest_lib.server.subcommand',

976

('parallel', 'parallel_simple', 'subcommand'))

977

_import_names('autotest_lib.server.utils',

978

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

979

_import_names('autotest_lib.client.common_lib.error')

980

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

981

982

# Inject ourself as the job object into other classes within the API.

983

# (Yuck, this injection is a gross thing be part of a public API. -gps)

984

#

985

# XXX Base & SiteAutotest do not appear to use .job. Who does?

986

namespace['autotest'].Autotest.job = self

987

# server.hosts.base_classes.Host uses .job.

988

namespace['hosts'].Host.job = self

Eric Li

10222b8

2010-11-24 09:33:15 -0800

[diff] [blame]

989

namespace['hosts'].factory.ssh_user = self._ssh_user

990

namespace['hosts'].factory.ssh_port = self._ssh_port

991

namespace['hosts'].factory.ssh_pass = self._ssh_pass

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

992

993

994

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

995

"""

996

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

997

998

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

1003

namespace: A dict containing names to make available during execution.

1004

protect: Boolean. If True (the default) a copy of the namespace dict

1005

is used during execution to prevent the code from modifying its

1006

contents outside of this function. If False the raw dict is

1007

passed in and modifications will be allowed.

1008

"""

1009

if protect:

1010

namespace = namespace.copy()

1011

self._fill_server_control_namespace(namespace, protect=protect)

1012

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1013

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1014

machines_text = '\n'.join(self.machines) + '\n'

1015

# Only rewrite the file if it does not match our machine list.

1016

try:

1017

machines_f = open(MACHINES_FILENAME, 'r')

1018

existing_machines_text = machines_f.read()

1019

machines_f.close()

1020

except EnvironmentError:

1021

existing_machines_text = None

1022

if machines_text != existing_machines_text:

1023

utils.open_write_close(MACHINES_FILENAME, machines_text)

1024

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1025

1026

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1027

def _parse_status(self, new_line):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1028

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1029

return

jadmanski

e29d0e4

2010-06-17 16:06:52 +0000

[diff] [blame]

1030

new_tests = self.parser.process_lines([new_line])

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1031

for test in new_tests:

1032

self.__insert_test(test)

1033

1034

1035

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1036

"""

1037

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1038

database. This method will not raise an exception, even if an

1039

error occurs during the insert, to avoid failing a test

1040

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1041

self.num_tests_run += 1

1042

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1043

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1044

try:

1045

self.results_db.insert_test(self.job_model, test)

1046

except Exception:

1047

msg = ("WARNING: An unexpected error occured while "

1048

"inserting test results into the database. "

1049

"Ignoring error.\n" + traceback.format_exc())

1050

print >> sys.stderr, msg

1051

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1052

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1053

def preprocess_client_state(self):

1054

"""

1055

Produce a state file for initializing the state of a client job.

1056

1057

Creates a new client state file with all the current server state, as

1058

well as some pre-set client state.

1059

1060

@returns The path of the file the state was written into.

1061

"""

1062

# initialize the sysinfo state

1063

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1064

1065

# dump the state out to a tempfile

1066

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1067

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1068

1069

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1070

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1075

"""

1076

Update the state of this job with the state from a client job.

1077

1078

Updates the state of the server side of a job with the final state

1079

of a client job that was run. Updates the non-client-specific state,

1080

pulls in some specific bits from the client-specific state, and then

1081

discards the rest. Removes the state file afterwards

1082

1083

@param state_file A path to the state file from the client.

1084

"""

1085

# update the on-disk state

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1086

try:

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1087

self._state.read_from_file(state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1088

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1089

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1090

# ignore file-not-found errors

1091

if e.errno != errno.ENOENT:

1092

raise

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1093

else:

1094

logging.debug('Client state file %s not found', state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1095

1096

# update the sysinfo state

1097

if self._state.has('client', 'sysinfo'):

1098

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1099

1100

# drop all the client-specific state

1101

self._state.discard_namespace('client')

1102

1103

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame]

1104

def clear_all_known_hosts(self):

1105

"""Clears known hosts files for all AbstractSSHHosts."""

1106

for host in self.hosts:

1107

if isinstance(host, abstract_ssh.AbstractSSHHost):

1108

host.clear_known_hosts()

1109

1110

mbligh

a700772

2009-01-13 00:37:11 +0000

[diff] [blame]

1111

site_server_job = utils.import_site_class(

1112

__file__, "autotest_lib.server.site_server_job", "site_server_job",

1113

base_server_job)

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1114

mbligh

0a8c332

2009-04-28 18:32:19 +0000

[diff] [blame]

1115

class server_job(site_server_job):

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1116

pass

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1117

1118

1119

class warning_manager(object):

1120

"""Class for controlling warning logs. Manages the enabling and disabling

1121

of warnings."""

1122

def __init__(self):

1123

# a map of warning types to a list of disabled time intervals

1124

self.disabled_warnings = {}

1125

1126

1127

def is_valid(self, timestamp, warning_type):

1128

"""Indicates if a warning (based on the time it occured and its type)

1129

is a valid warning. A warning is considered "invalid" if this type of

1130

warning was marked as "disabled" at the time the warning occured."""

1131

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1132

for start, end in disabled_intervals:

1133

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1139

"""As of now, disables all further warnings of this type."""

1140

intervals = self.disabled_warnings.setdefault(warning_type, [])

1141

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1142

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1143

1144

1145

def enable_warnings(self, warning_type, current_time_func=time.time):

1146

"""As of now, enables all further warnings of this type."""

1147

intervals = self.disabled_warnings.get(warning_type, [])

1148

if intervals and intervals[-1][1] is None:

jadmanski