Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2009-03-19 20:25:24 +0000

[diff] [blame]

9

import getpass, os, sys, re, stat, tempfile, time, select, subprocess

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

10

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

11

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

12

from autotest_lib.client.common_lib import base_job

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

13

from autotest_lib.client.common_lib import error, log, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

14

from autotest_lib.client.common_lib import logging_manager

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

15

from autotest_lib.server import test, subcommand, profilers

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame^]

16

from autotest_lib.server.hosts import abstract_ssh

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

17

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

18

19

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

20

def _control_segment_path(name):

21

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

22

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

23

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

24

25

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

26

CLIENT_CONTROL_FILENAME = 'control'

27

SERVER_CONTROL_FILENAME = 'control.srv'

28

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

29

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

30

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

31

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

32

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

33

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

34

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

35

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

36

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

37

REPAIR_CONTROL_FILE = _control_segment_path('repair')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

38

39

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

40

# by default provide a stub that generates no site data

41

def _get_site_job_data_dummy(job):

return {}

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

45

# load up site-specific code for generating site-specific job data

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

46

get_site_job_data = utils.import_site_function(__file__,

jadmanski

c0a623d

2009-03-03 21:11:48 +0000

[diff] [blame]

47

"autotest_lib.server.site_server_job", "get_site_job_data",

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

48

_get_site_job_data_dummy)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

49

50

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

51

class base_server_job(base_job.base_job):

52

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

53

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

54

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

63

"""

64

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

65

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

66

67

def __init__(self, control, args, resultdir, label, user, machines,

68

client=False, parse_job='',

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

69

ssh_user='root', ssh_port=22, ssh_pass='',

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

70

group_name='', tag='',

71

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

72

"""

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

73

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

74

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

75

@param control: The pathname of the control file.

76

@param args: Passed to the control file.

77

@param resultdir: Where to throw the results.

78

@param label: Description of the job.

79

@param user: Username for the job (email address).

80

@param client: True if this is a client-side control file.

81

@param parse_job: string, if supplied it is the job execution tag that

82

the results will be passed through to the TKO parser with.

83

@param ssh_user: The SSH username. [root]

84

@param ssh_port: The SSH port number. [22]

85

@param ssh_pass: The SSH passphrase, if needed.

86

@param group_name: If supplied, this will be written out as

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

87

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

88

@param tag: The job execution tag from the scheduler. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

89

@param control_filename: The filename where the server control file

90

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

91

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

92

super(base_server_job, self).__init__(resultdir=resultdir)

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

93

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

94

path = os.path.dirname(__file__)

95

self.control = control

96

self._uncollected_log_file = os.path.join(self.resultdir,

97

'uncollected_logs')

98

debugdir = os.path.join(self.resultdir, 'debug')

99

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

106

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

107

self.args = args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

108

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

109

self._client = client

110

self._record_prefix = ''

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

111

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

112

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

113

self._ssh_user = ssh_user

114

self._ssh_port = ssh_port

115

self._ssh_pass = ssh_pass

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

116

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

117

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

118

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

119

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

120

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

121

self._control_filename = control_filename

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

122

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

123

self.logging = logging_manager.get_logging_manager(

124

manage_stdout_and_stderr=True, redirect_fds=True)

125

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

126

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

127

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

128

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

129

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

130

job_data = {'label' : label, 'user' : user,

131

'hostname' : ','.join(machines),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

132

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

133

'job_started' : str(int(time.time()))}

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

134

if group_name:

135

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

136

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

137

# only write these keyvals out on the first job in a resultdir

138

if 'job_started' not in utils.read_keyval(self.resultdir):

139

job_data.update(get_site_job_data(self))

140

utils.write_keyval(self.resultdir, job_data)

141

142

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

143

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

144

self.pkgmgr = packages.PackageManager(

145

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

146

self.num_tests_run = 0

147

self.num_tests_failed = 0

148

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

149

self._register_subcommand_hooks()

150

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

151

# these components aren't usable on the server

152

self.bootloader = None

self.harness = None

@classmethod

def _find_base_directories(cls):

158

"""

159

Determine locations of autodir, clientdir and serverdir. Assumes

160

that this file is located within serverdir and uses __file__ along

161

with relative paths to resolve the location.

162

"""

163

serverdir = os.path.abspath(os.path.dirname(__file__))

164

autodir = os.path.normpath(os.path.join(serverdir, '..'))

165

clientdir = os.path.join(autodir, 'client')

166

return autodir, clientdir, serverdir

167

168

169

def _find_resultdir(self, resultdir):

170

"""

171

Determine the location of resultdir. For server jobs we expect one to

172

always be explicitly passed in to __init__, so just return that.

173

"""

174

if resultdir:

175

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

179

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

180

@staticmethod

181

def _load_control_file(path):

182

f = open(path)

183

try:

184

control_file = f.read()

185

finally:

186

f.close()

187

return re.sub('\r', '', control_file)

188

189

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

190

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

191

"""

192

Register some hooks into the subcommand modules that allow us

193

to properly clean up self.hosts created in forked subprocesses.

194

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

195

def on_fork(cmd):

196

self._existing_hosts_on_fork = set(self.hosts)

197

def on_join(cmd):

198

new_hosts = self.hosts - self._existing_hosts_on_fork

199

for host in new_hosts:

200

host.close()

201

subcommand.subcommand.register_fork_hook(on_fork)

202

subcommand.subcommand.register_join_hook(on_join)

203

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

204

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

205

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

206

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

207

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

208

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

209

the database if necessary.

210

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

211

if not self._using_parser:

212

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

213

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

214

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

215

parse_log = open(parse_log, 'w', 0)

216

tko_utils.redirect_parser_debugging(parse_log)

217

# create a job model object and set up the db

218

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

219

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

220

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

221

self.parser.start(self.job_model)

222

# check if a job already exists in the db and insert it if

223

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

224

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

225

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

226

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

227

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

228

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

229

self.job_model.index = job_idx

230

self.job_model.machine_idx = machine_idx

231

232

233

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

234

"""

235

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

236

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

237

remaining test results to the results db)

238

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

239

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

240

return

241

final_tests = self.parser.end()

242

for test in final_tests:

243

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

244

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def verify(self):

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

249

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

250

if self.resultdir:

251

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

252

try:

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

253

namespace = {'machines' : self.machines, 'job' : self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

254

'ssh_user' : self._ssh_user,

255

'ssh_port' : self._ssh_port,

256

'ssh_pass' : self._ssh_pass}

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

257

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

258

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

259

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

260

self.record('ABORT', None, None, msg)

raise

def repair(self, host_protection):

265

if not self.machines:

266

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

267

if self.resultdir:

268

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

269

namespace = {'machines': self.machines, 'job': self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

270

'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,

271

'ssh_pass': self._ssh_pass,

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

272

'protection_level': host_protection}

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

273

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

274

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def precheck(self):

"""

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

285

"""

286

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

292

"""

293

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

299

"""

300

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

305

def _make_parallel_wrapper(self, function, machines, log):

306

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

307

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

308

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

309

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

310

self._parse_job += "/" + machine

311

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

312

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

313

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

314

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

315

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

316

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

317

result = function(machine)

318

self.cleanup_parser()

319

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

320

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

321

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

322

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

323

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

324

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

325

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

326

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

327

result = function(machine)

328

return result

329

else:

330

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

335

return_results=False):

336

"""

337

Run 'function' using parallel_simple, with an extra wrapper to handle

338

the necessary setup for continuous parsing, if possible. If continuous

339

parsing is already properly initialized then this should just work.

340

341

@param function: A callable to run in parallel given each machine.

342

@param machines: A list of machine names to be passed one per subcommand

343

invocation of function.

344

@param log: If True, output will be written to output in a subdirectory

345

named after each machine.

346

@param timeout: Seconds after which the function call should timeout.

347

@param return_results: If True instead of an AutoServError being raised

348

on any error a list of the results|exceptions from the function

349

called on each arg is returned. [default: False]

350

351

@raises error.AutotestError: If any of the functions failed.

352

"""

353

wrapper = self._make_parallel_wrapper(function, machines, log)

354

return subcommand.parallel_simple(wrapper, machines,

355

log=log, timeout=timeout,

356

return_results=return_results)

357

358

359

def parallel_on_machines(self, function, machines, timeout=None):

360

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

361

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

362

@param machines: A list of machines to call function(machine) on.

363

@param timeout: Seconds after which the function call should timeout.

364

365

@returns A list of machines on which function(machine) returned

366

without raising an exception.

367

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

368

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

369

return_results=True)

370

success_machines = []

371

for result, machine in itertools.izip(results, machines):

372

if not isinstance(result, Exception):

373

success_machines.append(machine)

374

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

375

376

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

377

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

378

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

379

collect_crashdumps=True, namespace={}, control=None,

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

380

control_file_dir=None, only_collect_crashinfo=False):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

381

# for a normal job, make sure the uncollected logs file exists

382

# for a crashinfo-only run it should already exist, bail out otherwise

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

383

created_uncollected_logs = False

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

384

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

385

if only_collect_crashinfo:

386

# if this is a crashinfo-only run, and there were no existing

387

# uncollected logs, just bail out early

388

logging.info("No existing uncollected logs, "

389

"skipping crashinfo collection")

390

return

391

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

392

log_file = open(self._uncollected_log_file, "w")

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

393

pickle.dump([], log_file)

394

log_file.close()

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

395

created_uncollected_logs = True

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

396

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

397

# use a copy so changes don't affect the original dictionary

398

namespace = namespace.copy()

399

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

400

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

401

if self.control is None:

402

control = ''

403

else:

404

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

405

if control_file_dir is None:

406

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

407

408

self.aborted = False

409

namespace['machines'] = machines

jadmanski

808f4b1

2010-04-09 22:30:31 +0000

[diff] [blame]

410

namespace['args'] = self.args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

411

namespace['job'] = self

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

412

namespace['ssh_user'] = self._ssh_user

413

namespace['ssh_port'] = self._ssh_port

414

namespace['ssh_pass'] = self._ssh_pass

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

415

test_start_time = int(time.time())

416

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

417

if self.resultdir:

418

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

419

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

420

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

421

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

422

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

423

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

424

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

425

try:

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

426

try:

427

if install_before and machines:

428

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

429

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

430

if only_collect_crashinfo:

431

return

432

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

433

# determine the dir to write the control files to

434

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

435

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

436

if cfd_specified:

437

temp_control_file_dir = None

438

else:

439

temp_control_file_dir = tempfile.mkdtemp(

440

suffix='temp_control_file_dir')

441

control_file_dir = temp_control_file_dir

442

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

443

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

444

client_control_file = os.path.join(control_file_dir,

445

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

446

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

447

namespace['control'] = control

448

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

449

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

450

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

451

else:

452

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

453

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

454

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

455

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

456

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

457

# no error occured, so we don't need to collect crashinfo

458

collect_crashinfo = False

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

except:

try:

logging.exception(

'Exception escaped control file, job aborting:')

463

except:

464

pass # don't let logging exceptions here interfere

465

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

466

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

467

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

468

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

469

try:

470

shutil.rmtree(temp_control_file_dir)

471

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

472

logging.warn('Could not remove temp directory %s: %s',

473

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

474

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

475

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

476

namespace['test_start_time'] = test_start_time

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

477

if collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

478

# includes crashdumps

479

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

480

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

481

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame]

482

if self._uncollected_log_file and created_uncollected_logs:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

483

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

484

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

485

if cleanup and machines:

486

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

487

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

488

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

489

490

491

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

492

"""

493

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

494

495

tag

496

tag to add to testname

497

url

498

url of the test to run

499

"""

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

500

group, testname = self.pkgmgr.get_package_name(url, 'test')

501

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

502

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

507

except error.TestBaseException, e:

508

self.record(e.exit_status, subdir, testname, str(e))

509

raise

510

except Exception, e:

511

info = str(e) + "\n" + traceback.format_exc()

512

self.record('FAIL', subdir, testname, info)

513

raise

514

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

515

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

516

517

result, exc_info = self._run_group(testname, subdir, group_func)

518

if exc_info and isinstance(exc_info[1], error.TestBaseException):

519

return False

520

elif exc_info:

521

raise exc_info[0], exc_info[1], exc_info[2]

522

else:

523

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

524

525

526

def _run_group(self, name, subdir, function, *args, **dargs):

527

"""\

528

Underlying method for running something inside of a group.

529

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

530

result, exc_info = None, None

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

531

old_record_prefix = self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

532

try:

533

self.record('START', subdir, name)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

534

self._record_prefix += '\t'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

535

try:

536

result = function(*args, **dargs)

537

finally:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

538

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

539

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

540

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

541

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

542

except Exception, e:

543

err_msg = str(e) + '\n'

544

err_msg += traceback.format_exc()

545

self.record('END ABORT', subdir, name, err_msg)

546

raise error.JobError(name + ' failed\n' + traceback.format_exc())

547

else:

548

self.record('END GOOD', subdir, name)

549

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

550

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

551

552

553

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

559

"""

560

561

name = function.__name__

562

563

# Allow the tag for the group to be specified.

564

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

568

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

569

570

571

def run_reboot(self, reboot_func, get_kernel_func):

572

"""\

573

A specialization of run_group meant specifically for handling

574

a reboot. Includes support for capturing the kernel version

575

after the reboot.

576

577

reboot_func: a function that carries out the reboot

578

579

get_kernel_func: a function that returns a string

580

representing the kernel version.

581

"""

582

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

583

old_record_prefix = self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

584

try:

585

self.record('START', None, 'reboot')

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

586

self._record_prefix += '\t'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

587

reboot_func()

588

except Exception, e:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

589

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

590

err_msg = str(e) + '\n' + traceback.format_exc()

591

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

592

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

593

else:

594

kernel = get_kernel_func()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

595

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

596

self.record('END GOOD', None, 'reboot',

597

optional_fields={"kernel": kernel})

598

599

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

600

def run_control(self, path):

601

"""Execute a control file found at path (relative to the autotest

602

path). Intended for executing a control file within a control file,

603

not for running the top-level job control file."""

604

path = os.path.join(self.autodir, path)

605

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

606

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

607

608

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

609

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

610

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

615

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

616

617

618

def _add_sysinfo_loggable(self, loggable, on_every_test):

619

if on_every_test:

620

self.sysinfo.test_loggables.add(loggable)

621

else:

622

self.sysinfo.boot_loggables.add(loggable)

623

624

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

625

def record(self, status_code, subdir, operation, status='',

626

optional_fields=None):

627

"""

628

Record job-level status

629

630

The intent is to make this file both machine parseable and

631

human readable. That involves a little more complexity, but

632

really isn't all that bad ;-)

633

634

Format is <status code>\t<subdir>\t<operation>\t<status>

635

mbligh

1b3b376

2008-09-25 02:46:34 +0000

[diff] [blame]

636

status code: see common_lib.log.is_valid_status()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

637

for valid status definition

638

639

subdir: MUST be a relevant subdirectory in the results,

640

or None, which will be represented as '----'

641

642

operation: description of what you ran (e.g. "dbench", or

643

"mkfs -t foobar /dev/sda9")

644

645

status: error message or "completed sucessfully"

646

647

------------------------------------------------------------

648

649

Initial tabs indicate indent levels for grouping, and is

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

650

governed by self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

651

652

multiline messages have secondary lines prefaced by a double

653

space (' ')

654

655

Executing this method will trigger the logging of all new

656

warnings to date from the various console loggers.

657

"""

658

# poll all our warning loggers for new warnings

659

warnings = self._read_warnings()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

660

old_record_prefix = self._record_prefix

jadmanski

2de8311

2009-04-01 18:21:04 +0000

[diff] [blame]

661

try:

662

if status_code.startswith("END "):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

663

self._record_prefix += "\t"

jadmanski

2de8311

2009-04-01 18:21:04 +0000

[diff] [blame]

664

for timestamp, msg in warnings:

665

self._record("WARN", None, None, msg, timestamp)

666

finally:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

667

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

668

669

# write out the actual status log line

670

self._record(status_code, subdir, operation, status,

671

optional_fields=optional_fields)

672

673

674

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

675

"""Poll all the warning loggers and extract any new warnings that have

676

been logged. If the warnings belong to a category that is currently

677

disabled, this method will discard them and they will no longer be

678

retrievable.

679

680

Returns a list of (timestamp, message) tuples, where timestamp is an

681

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

682

warnings = []

683

while True:

684

# pull in a line of output from every logger that has

685

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

686

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

687

closed_loggers = set()

688

for logger in loggers:

689

line = logger.readline()

690

# record any broken pipes (aka line == empty)

691

if len(line) == 0:

692

closed_loggers.add(logger)

693

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

694

# parse out the warning

695

timestamp, msgtype, msg = line.split('\t', 2)

696

timestamp = int(timestamp)

697

# if the warning is valid, add it to the results

698

if self.warning_manager.is_valid(timestamp, msgtype):

699

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

700

701

# stop listening to loggers that are closed

702

self.warning_loggers -= closed_loggers

703

704

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

713

def _unique_subdirectory(self, base_subdirectory_name):

714

"""Compute a unique results subdirectory based on the given name.

715

716

Appends base_subdirectory_name with a number as necessary to find a

717

directory name that doesn't already exist.

718

"""

719

subdirectory = base_subdirectory_name

720

counter = 1

721

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

722

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

def record_summary(self, status_code, test_name, reason='', attributes=None,

728

distinguishing_attributes=(), child_test_ids=None):

729

"""Record a summary test result.

730

731

@param status_code: status code string, see

732

common_lib.log.is_valid_status()

733

@param test_name: name of the test

734

@param reason: (optional) string providing detailed reason for test

735

outcome

736

@param attributes: (optional) dict of string keyvals to associate with

737

this result

738

@param distinguishing_attributes: (optional) list of attribute names

739

that should be used to distinguish identically-named test

740

results. These attributes should be present in the attributes

741

parameter. This is used to generate user-friendly subdirectory

742

names.

743

@param child_test_ids: (optional) list of test indices for test results

744

used in generating this result.

745

"""

746

subdirectory_name_parts = [test_name]

747

for attribute in distinguishing_attributes:

748

assert attributes

749

assert attribute in attributes, '%s not in %s' % (attribute,

750

attributes)

751

subdirectory_name_parts.append(attributes[attribute])

752

base_subdirectory_name = '.'.join(subdirectory_name_parts)

753

754

subdirectory = self._unique_subdirectory(base_subdirectory_name)

755

subdirectory_path = os.path.join(self.resultdir, subdirectory)

756

os.mkdir(subdirectory_path)

757

758

self.record(status_code, subdirectory, test_name,

759

status=reason, optional_fields={'is_summary': True})

760

761

if attributes:

762

utils.write_keyval(subdirectory_path, attributes)

763

764

if child_test_ids:

765

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

766

summary_data = {'child_test_ids': ids_string}

767

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

771

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

772

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

773

self.record("INFO", None, None,

774

"disabling %s warnings" % warning_type,

775

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

776

777

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

778

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

779

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

780

self.record("INFO", None, None,

781

"enabling %s warnings" % warning_type,

782

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

783

784

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

785

def get_status_log_path(self, subdir=None):

786

"""Return the path to the job status log.

787

788

@param subdir - Optional paramter indicating that you want the path

789

to a subdirectory status log.

790

791

@returns The path where the status log should be.

792

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

793

if self.resultdir:

794

if subdir:

795

return os.path.join(self.resultdir, subdir, "status.log")

796

else:

797

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

798

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

799

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

800

801

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

802

def _update_uncollected_logs_list(self, update_func):

803

"""Updates the uncollected logs list in a multi-process safe manner.

804

805

@param update_func - a function that updates the list of uncollected

806

logs. Should take one parameter, the list to be updated.

807

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

808

if self._uncollected_log_file:

809

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

810

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

811

try:

812

uncollected_logs = pickle.load(log_file)

813

update_func(uncollected_logs)

814

log_file.seek(0)

815

log_file.truncate()

816

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

817

log_file.flush()

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

818

finally:

819

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

824

"""Adds a new set of client logs to the list of uncollected logs,

825

to allow for future log recovery.

826

827

@param host - the hostname of the machine holding the logs

828

@param remote_path - the directory on the remote machine holding logs

829

@param local_path - the local directory to copy the logs into

830

"""

831

def update_func(logs_list):

832

logs_list.append((hostname, remote_path, local_path))

833

self._update_uncollected_logs_list(update_func)

834

835

836

def remove_client_log(self, hostname, remote_path, local_path):

837

"""Removes a set of client logs from the list of uncollected logs,

838

to allow for future log recovery.

839

840

@param host - the hostname of the machine holding the logs

841

@param remote_path - the directory on the remote machine holding logs

842

@param local_path - the local directory to copy the logs into

843

"""

844

def update_func(logs_list):

845

logs_list.remove((hostname, remote_path, local_path))

846

self._update_uncollected_logs_list(update_func)

847

848

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

849

def get_client_logs(self):

850

"""Retrieves the list of uncollected logs, if it exists.

851

852

@returns A list of (host, remote_path, local_path) tuples. Returns

853

an empty list if no uncollected logs file exists.

854

"""

855

log_exists = (self._uncollected_log_file and

856

os.path.exists(self._uncollected_log_file))

857

if log_exists:

858

return pickle.load(open(self._uncollected_log_file))

else:

return []

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

863

def _render_record(self, status_code, subdir, operation, status='',

864

epoch_time=None, record_prefix=None,

865

optional_fields=None):

866

"""

867

Internal Function to generate a record to be written into a

868

status log. For use by server_job.* classes only.

869

"""

870

if subdir:

871

if re.match(r'[\n\t]', subdir):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

872

raise ValueError('Invalid character in subdir string')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

substr = subdir

else:

substr = '----'

mbligh

2008-09-25 02:46:34 +0000

[diff] [blame]

877

if not log.is_valid_status(status_code):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

878

raise ValueError('Invalid status code supplied: %s' % status_code)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

879

if not operation:

880

operation = '----'

881

if re.match(r'[\n\t]', operation):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

882

raise ValueError('Invalid character in operation string')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

883

operation = operation.rstrip()

884

status = status.rstrip()

885

status = re.sub(r"\t", " ", status)

886

# Ensure any continuation lines are marked so we can

887

# detect them in the status file to ensure it is parsable.

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

888

status = re.sub(r"\n", "\n" + self._record_prefix + " ", status)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

889

890

if not optional_fields:

891

optional_fields = {}

892

893

# Generate timestamps for inclusion in the logs

894

if epoch_time is None:

895

epoch_time = int(time.time())

896

local_time = time.localtime(epoch_time)

897

optional_fields["timestamp"] = str(epoch_time)

898

optional_fields["localtime"] = time.strftime("%b %d %H:%M:%S",

899

local_time)

900

901

fields = [status_code, substr, operation]

902

fields += ["%s=%s" % x for x in optional_fields.iteritems()]

903

fields.append(status)

904

905

if record_prefix is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

906

record_prefix = self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

907

908

msg = '\t'.join(str(x) for x in fields)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

909

return record_prefix + msg + '\n'

910

911

912

def _record_prerendered(self, msg):

913

"""

914

Record a pre-rendered msg into the status logs. The only

915

change this makes to the message is to add on the local

916

indentation. Should not be called outside of server_job.*

917

classes. Unlike _record, this does not write the message

918

to standard output.

919

"""

920

lines = []

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

921

status_file = self.get_status_log_path()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

922

status_log = open(status_file, 'a')

923

for line in msg.splitlines():

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

924

line = self._record_prefix + line + '\n'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

925

lines.append(line)

926

status_log.write(line)

927

status_log.close()

928

self.__parse_status(lines)

929

930

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

931

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

932

"""

933

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

934

935

This sets up the control file API by importing modules and making them

936

available under the appropriate names within namespace.

937

938

For use by _execute_code().

939

940

Args:

941

namespace: The namespace dictionary to fill in.

942

protect: Boolean. If True (the default) any operation that would

943

clobber an existing entry in namespace will cause an error.

944

Raises:

945

error.AutoservError: When a name would be clobbered by import.

946

"""

947

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

948

"""

949

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

950

951

Args:

952

module_name: The string module name.

953

names: A limiting list of names to import from module_name. If

954

empty (the default), all names are imported from the module

955

similar to a "from foo.bar import *" statement.

956

Raises:

957

error.AutoservError: When a name being imported would clobber

958

a name already in namespace.

959

"""

960

module = __import__(module_name, {}, {}, names)

961

962

# No names supplied? Import * from the lowest level module.

963

# (Ugh, why do I have to implement this part myself?)

964

if not names:

965

for submodule_name in module_name.split('.')[1:]:

966

module = getattr(module, submodule_name)

967

if hasattr(module, '__all__'):

968

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

973

# doesn't override anything that already exists.

974

for name in names:

975

# Check for conflicts to help prevent future problems.

976

if name in namespace and protect:

977

if namespace[name] is not getattr(module, name):

978

raise error.AutoservError('importing name '

979

'%s from %s %r would override %r' %

980

(name, module_name, getattr(module, name),

981

namespace[name]))

982

else:

983

# Encourage cleanliness and the use of __all__ for a

984

# more concrete API with less surprises on '*' imports.

985

warnings.warn('%s (%r) being imported from %s for use '

986

'in server control files is not the '

987

'first occurrance of that import.' %

988

(name, namespace[name], module_name))

989

990

namespace[name] = getattr(module, name)

991

992

993

# This is the equivalent of prepending a bunch of import statements to

994

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

995

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

996

_import_names('autotest_lib.server',

997

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

998

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

999

_import_names('autotest_lib.server.subcommand',

1000

('parallel', 'parallel_simple', 'subcommand'))

1001

_import_names('autotest_lib.server.utils',

1002

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

1003

_import_names('autotest_lib.client.common_lib.error')

1004

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

1005

1006

# Inject ourself as the job object into other classes within the API.

1007

# (Yuck, this injection is a gross thing be part of a public API. -gps)

1008

#

1009

# XXX Base & SiteAutotest do not appear to use .job. Who does?

1010

namespace['autotest'].Autotest.job = self

1011

# server.hosts.base_classes.Host uses .job.

1012

namespace['hosts'].Host.job = self

1013

1014

1015

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1016

"""

1017

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1018

1019

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

1024

namespace: A dict containing names to make available during execution.

1025

protect: Boolean. If True (the default) a copy of the namespace dict

1026

is used during execution to prevent the code from modifying its

1027

contents outside of this function. If False the raw dict is

1028

passed in and modifications will be allowed.

1029

"""

1030

if protect:

1031

namespace = namespace.copy()

1032

self._fill_server_control_namespace(namespace, protect=protect)

1033

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1034

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1035

machines_text = '\n'.join(self.machines) + '\n'

1036

# Only rewrite the file if it does not match our machine list.

1037

try:

1038

machines_f = open(MACHINES_FILENAME, 'r')

1039

existing_machines_text = machines_f.read()

1040

machines_f.close()

1041

except EnvironmentError:

1042

existing_machines_text = None

1043

if machines_text != existing_machines_text:

1044

utils.open_write_close(MACHINES_FILENAME, machines_text)

1045

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1046

1047

1048

def _record(self, status_code, subdir, operation, status='',

1049

epoch_time=None, optional_fields=None):

1050

"""

1051

Actual function for recording a single line into the status

1052

logs. Should never be called directly, only by job.record as

1053

this would bypass the console monitor logging.

1054

"""

1055

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1056

msg = self._render_record(status_code, subdir, operation, status,

1057

epoch_time, optional_fields=optional_fields)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1058

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

1059

status_file = self.get_status_log_path()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1060

sys.stdout.write(msg)

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

1061

if status_file:

1062

open(status_file, "a").write(msg)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1063

if subdir:

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

1064

sub_status_file = self.get_status_log_path(subdir)

1065

open(sub_status_file, "a").write(msg)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1066

self.__parse_status(msg.splitlines())

1067

1068

1069

def __parse_status(self, new_lines):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1070

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1071

return

1072

new_tests = self.parser.process_lines(new_lines)

1073

for test in new_tests:

1074

self.__insert_test(test)

1075

1076

1077

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1078

"""

1079

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1080

database. This method will not raise an exception, even if an

1081

error occurs during the insert, to avoid failing a test

1082

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1083

self.num_tests_run += 1

1084

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1085

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1086

try:

1087

self.results_db.insert_test(self.job_model, test)

1088

except Exception:

1089

msg = ("WARNING: An unexpected error occured while "

1090

"inserting test results into the database. "

1091

"Ignoring error.\n" + traceback.format_exc())

1092

print >> sys.stderr, msg

1093

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1094

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1095

def preprocess_client_state(self):

1096

"""

1097

Produce a state file for initializing the state of a client job.

1098

1099

Creates a new client state file with all the current server state, as

1100

well as some pre-set client state.

1101

1102

@returns The path of the file the state was written into.

1103

"""

1104

# initialize the sysinfo state

1105

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1106

1107

# dump the state out to a tempfile

1108

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1109

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1110

1111

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1112

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1117

"""

1118

Update the state of this job with the state from a client job.

1119

1120

Updates the state of the server side of a job with the final state

1121

of a client job that was run. Updates the non-client-specific state,

1122

pulls in some specific bits from the client-specific state, and then

1123

discards the rest. Removes the state file afterwards

1124

1125

@param state_file A path to the state file from the client.

1126

"""

1127

# update the on-disk state

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1128

try:

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1129

self._state.read_from_file(state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1130

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1131

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1132

# ignore file-not-found errors

1133

if e.errno != errno.ENOENT:

1134

raise

jadmanski

b6e7bdb

2010-04-13 16:00:39 +0000

[diff] [blame]

1135

else:

1136

logging.debug('Client state file %s not found', state_path)

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1137

1138

# update the sysinfo state

1139

if self._state.has('client', 'sysinfo'):

1140

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1141

1142

# drop all the client-specific state

1143

self._state.discard_namespace('client')

1144

1145

mbligh

0a88370

2010-04-21 01:58:34 +0000

[diff] [blame^]

1146

def clear_all_known_hosts(self):

1147

"""Clears known hosts files for all AbstractSSHHosts."""

1148

for host in self.hosts:

1149

if isinstance(host, abstract_ssh.AbstractSSHHost):

1150

host.clear_known_hosts()

1151

1152

mbligh

a700772

2009-01-13 00:37:11 +0000

[diff] [blame]

1153

site_server_job = utils.import_site_class(

1154

__file__, "autotest_lib.server.site_server_job", "site_server_job",

1155

base_server_job)

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1156

mbligh

0a8c332

2009-04-28 18:32:19 +0000

[diff] [blame]

1157

class server_job(site_server_job):

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1158

pass

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1159

1160

1161

class warning_manager(object):

1162

"""Class for controlling warning logs. Manages the enabling and disabling

1163

of warnings."""

1164

def __init__(self):

1165

# a map of warning types to a list of disabled time intervals

1166

self.disabled_warnings = {}

1167

1168

1169

def is_valid(self, timestamp, warning_type):

1170

"""Indicates if a warning (based on the time it occured and its type)

1171

is a valid warning. A warning is considered "invalid" if this type of

1172

warning was marked as "disabled" at the time the warning occured."""

1173

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1174

for start, end in disabled_intervals:

1175

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1181

"""As of now, disables all further warnings of this type."""

1182

intervals = self.disabled_warnings.setdefault(warning_type, [])

1183

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1184

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1185

1186

1187

def enable_warnings(self, warning_type, current_time_func=time.time):

1188

"""As of now, enables all further warnings of this type."""

1189

intervals = self.disabled_warnings.get(warning_type, [])

1190

if intervals and intervals[-1][1] is None:

jadmanski