Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2009-03-19 20:25:24 +0000

[diff] [blame]

9

import getpass, os, sys, re, stat, tempfile, time, select, subprocess

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

10

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

11

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

12

from autotest_lib.client.common_lib import base_job

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

13

from autotest_lib.client.common_lib import error, log, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

14

from autotest_lib.client.common_lib import logging_manager

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

15

from autotest_lib.server import test, subcommand, profilers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

16

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

17

18

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

19

def _control_segment_path(name):

20

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

21

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

22

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

23

24

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

25

CLIENT_CONTROL_FILENAME = 'control'

26

SERVER_CONTROL_FILENAME = 'control.srv'

27

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

28

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

29

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

30

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

31

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

32

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

33

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

34

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

35

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

36

REPAIR_CONTROL_FILE = _control_segment_path('repair')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

37

38

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

39

# by default provide a stub that generates no site data

40

def _get_site_job_data_dummy(job):

return {}

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

44

# load up site-specific code for generating site-specific job data

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

45

get_site_job_data = utils.import_site_function(__file__,

jadmanski

c0a623d

2009-03-03 21:11:48 +0000

[diff] [blame]

46

"autotest_lib.server.site_server_job", "get_site_job_data",

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

47

_get_site_job_data_dummy)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

48

49

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

50

class base_server_job(base_job.base_job):

51

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

52

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

53

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

62

"""

63

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

64

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

65

66

def __init__(self, control, args, resultdir, label, user, machines,

67

client=False, parse_job='',

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

68

ssh_user='root', ssh_port=22, ssh_pass='',

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

69

group_name='', tag='',

70

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

71

"""

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

72

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

73

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

74

@param control: The pathname of the control file.

75

@param args: Passed to the control file.

76

@param resultdir: Where to throw the results.

77

@param label: Description of the job.

78

@param user: Username for the job (email address).

79

@param client: True if this is a client-side control file.

80

@param parse_job: string, if supplied it is the job execution tag that

81

the results will be passed through to the TKO parser with.

82

@param ssh_user: The SSH username. [root]

83

@param ssh_port: The SSH port number. [22]

84

@param ssh_pass: The SSH passphrase, if needed.

85

@param group_name: If supplied, this will be written out as

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

86

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

87

@param tag: The job execution tag from the scheduler. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

88

@param control_filename: The filename where the server control file

89

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

90

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

91

super(base_server_job, self).__init__(resultdir=resultdir)

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

92

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

93

path = os.path.dirname(__file__)

94

self.control = control

95

self._uncollected_log_file = os.path.join(self.resultdir,

96

'uncollected_logs')

97

debugdir = os.path.join(self.resultdir, 'debug')

98

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

105

106

self._args = args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

107

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

108

self._client = client

109

self._record_prefix = ''

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

110

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

111

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

112

self._ssh_user = ssh_user

113

self._ssh_port = ssh_port

114

self._ssh_pass = ssh_pass

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

115

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

116

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

117

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

118

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

119

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

120

self._control_filename = control_filename

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

121

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

122

self.logging = logging_manager.get_logging_manager(

123

manage_stdout_and_stderr=True, redirect_fds=True)

124

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

125

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

126

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

127

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

128

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

129

job_data = {'label' : label, 'user' : user,

130

'hostname' : ','.join(machines),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

131

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

132

'job_started' : str(int(time.time()))}

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

133

if group_name:

134

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

135

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

136

# only write these keyvals out on the first job in a resultdir

137

if 'job_started' not in utils.read_keyval(self.resultdir):

138

job_data.update(get_site_job_data(self))

139

utils.write_keyval(self.resultdir, job_data)

140

141

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

142

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

143

self.pkgmgr = packages.PackageManager(

144

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

145

self.num_tests_run = 0

146

self.num_tests_failed = 0

147

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

148

self._register_subcommand_hooks()

149

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

150

# these components aren't usable on the server

151

self.bootloader = None

self.harness = None

@classmethod

def _find_base_directories(cls):

157

"""

158

Determine locations of autodir, clientdir and serverdir. Assumes

159

that this file is located within serverdir and uses __file__ along

160

with relative paths to resolve the location.

161

"""

162

serverdir = os.path.abspath(os.path.dirname(__file__))

163

autodir = os.path.normpath(os.path.join(serverdir, '..'))

164

clientdir = os.path.join(autodir, 'client')

165

return autodir, clientdir, serverdir

166

167

168

def _find_resultdir(self, resultdir):

169

"""

170

Determine the location of resultdir. For server jobs we expect one to

171

always be explicitly passed in to __init__, so just return that.

172

"""

173

if resultdir:

174

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

178

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

179

@staticmethod

180

def _load_control_file(path):

181

f = open(path)

182

try:

183

control_file = f.read()

184

finally:

185

f.close()

186

return re.sub('\r', '', control_file)

187

188

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

189

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

190

"""

191

Register some hooks into the subcommand modules that allow us

192

to properly clean up self.hosts created in forked subprocesses.

193

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

194

def on_fork(cmd):

195

self._existing_hosts_on_fork = set(self.hosts)

196

def on_join(cmd):

197

new_hosts = self.hosts - self._existing_hosts_on_fork

198

for host in new_hosts:

199

host.close()

200

subcommand.subcommand.register_fork_hook(on_fork)

201

subcommand.subcommand.register_join_hook(on_join)

202

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

203

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

204

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

205

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

206

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

207

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

208

the database if necessary.

209

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

210

if not self._using_parser:

211

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

212

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

213

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

214

parse_log = open(parse_log, 'w', 0)

215

tko_utils.redirect_parser_debugging(parse_log)

216

# create a job model object and set up the db

217

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

218

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

219

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

220

self.parser.start(self.job_model)

221

# check if a job already exists in the db and insert it if

222

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

223

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

224

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

225

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

226

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

227

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

228

self.job_model.index = job_idx

229

self.job_model.machine_idx = machine_idx

230

231

232

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

233

"""

234

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

235

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

236

remaining test results to the results db)

237

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

238

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

239

return

240

final_tests = self.parser.end()

241

for test in final_tests:

242

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

243

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def verify(self):

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

248

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

249

if self.resultdir:

250

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

251

try:

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

252

namespace = {'machines' : self.machines, 'job' : self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

253

'ssh_user' : self._ssh_user,

254

'ssh_port' : self._ssh_port,

255

'ssh_pass' : self._ssh_pass}

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

256

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

257

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

258

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

259

self.record('ABORT', None, None, msg)

raise

def repair(self, host_protection):

264

if not self.machines:

265

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

266

if self.resultdir:

267

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

268

namespace = {'machines': self.machines, 'job': self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

269

'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,

270

'ssh_pass': self._ssh_pass,

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

271

'protection_level': host_protection}

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

272

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

273

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def precheck(self):

"""

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

284

"""

285

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

291

"""

292

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

298

"""

299

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

304

def _make_parallel_wrapper(self, function, machines, log):

305

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

306

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

307

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

308

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

309

self._parse_job += "/" + machine

310

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

311

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

312

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

313

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

314

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

315

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

316

result = function(machine)

317

self.cleanup_parser()

318

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

319

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

320

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

321

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

322

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

323

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

324

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

325

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

326

result = function(machine)

327

return result

328

else:

329

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

334

return_results=False):

335

"""

336

Run 'function' using parallel_simple, with an extra wrapper to handle

337

the necessary setup for continuous parsing, if possible. If continuous

338

parsing is already properly initialized then this should just work.

339

340

@param function: A callable to run in parallel given each machine.

341

@param machines: A list of machine names to be passed one per subcommand

342

invocation of function.

343

@param log: If True, output will be written to output in a subdirectory

344

named after each machine.

345

@param timeout: Seconds after which the function call should timeout.

346

@param return_results: If True instead of an AutoServError being raised

347

on any error a list of the results|exceptions from the function

348

called on each arg is returned. [default: False]

349

350

@raises error.AutotestError: If any of the functions failed.

351

"""

352

wrapper = self._make_parallel_wrapper(function, machines, log)

353

return subcommand.parallel_simple(wrapper, machines,

354

log=log, timeout=timeout,

355

return_results=return_results)

356

357

358

def parallel_on_machines(self, function, machines, timeout=None):

359

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

360

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

361

@param machines: A list of machines to call function(machine) on.

362

@param timeout: Seconds after which the function call should timeout.

363

364

@returns A list of machines on which function(machine) returned

365

without raising an exception.

366

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

367

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

368

return_results=True)

369

success_machines = []

370

for result, machine in itertools.izip(results, machines):

371

if not isinstance(result, Exception):

372

success_machines.append(machine)

373

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

374

375

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

376

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

377

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

378

collect_crashdumps=True, namespace={}, control=None,

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

379

control_file_dir=None, only_collect_crashinfo=False):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

380

# for a normal job, make sure the uncollected logs file exists

381

# for a crashinfo-only run it should already exist, bail out otherwise

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame^]

382

created_uncollected_logs = False

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

383

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

384

if only_collect_crashinfo:

385

# if this is a crashinfo-only run, and there were no existing

386

# uncollected logs, just bail out early

387

logging.info("No existing uncollected logs, "

388

"skipping crashinfo collection")

389

return

390

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

391

log_file = open(self._uncollected_log_file, "w")

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

392

pickle.dump([], log_file)

393

log_file.close()

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame^]

394

created_uncollected_logs = True

jadmanski

2009-04-29 17:39:16 +0000

[diff] [blame]

395

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

396

# use a copy so changes don't affect the original dictionary

397

namespace = namespace.copy()

398

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

399

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

400

if self.control is None:

401

control = ''

402

else:

403

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

404

if control_file_dir is None:

405

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

406

407

self.aborted = False

408

namespace['machines'] = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

409

namespace['args'] = self._args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

410

namespace['job'] = self

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

411

namespace['ssh_user'] = self._ssh_user

412

namespace['ssh_port'] = self._ssh_port

413

namespace['ssh_pass'] = self._ssh_pass

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

414

test_start_time = int(time.time())

415

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

416

if self.resultdir:

417

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

418

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

419

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

420

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

421

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

422

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

423

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

424

try:

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

425

try:

426

if install_before and machines:

427

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

428

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

429

if only_collect_crashinfo:

430

return

431

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

432

# determine the dir to write the control files to

433

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

434

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

435

if cfd_specified:

436

temp_control_file_dir = None

437

else:

438

temp_control_file_dir = tempfile.mkdtemp(

439

suffix='temp_control_file_dir')

440

control_file_dir = temp_control_file_dir

441

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame]

442

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

443

client_control_file = os.path.join(control_file_dir,

444

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

445

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

446

namespace['control'] = control

447

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

448

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

449

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

450

else:

451

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

452

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

453

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

454

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

455

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

456

# no error occured, so we don't need to collect crashinfo

457

collect_crashinfo = False

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

except:

try:

logging.exception(

'Exception escaped control file, job aborting:')

462

except:

463

pass # don't let logging exceptions here interfere

464

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

465

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

466

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

467

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

468

try:

469

shutil.rmtree(temp_control_file_dir)

470

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

471

logging.warn('Could not remove temp directory %s: %s',

472

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

473

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

474

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

475

namespace['test_start_time'] = test_start_time

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

476

if collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

477

# includes crashdumps

478

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

479

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

480

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

jadmanski

648c39f

2010-03-19 17:38:01 +0000

[diff] [blame^]

481

if self._uncollected_log_file and created_uncollected_logs:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

482

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

483

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

484

if cleanup and machines:

485

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

486

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

487

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

488

489

490

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

491

"""

492

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

493

494

tag

495

tag to add to testname

496

url

497

url of the test to run

498

"""

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

499

group, testname = self.pkgmgr.get_package_name(url, 'test')

500

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

501

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

506

except error.TestBaseException, e:

507

self.record(e.exit_status, subdir, testname, str(e))

508

raise

509

except Exception, e:

510

info = str(e) + "\n" + traceback.format_exc()

511

self.record('FAIL', subdir, testname, info)

512

raise

513

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

514

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

515

516

result, exc_info = self._run_group(testname, subdir, group_func)

517

if exc_info and isinstance(exc_info[1], error.TestBaseException):

518

return False

519

elif exc_info:

520

raise exc_info[0], exc_info[1], exc_info[2]

521

else:

522

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

523

524

525

def _run_group(self, name, subdir, function, *args, **dargs):

526

"""\

527

Underlying method for running something inside of a group.

528

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

529

result, exc_info = None, None

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

530

old_record_prefix = self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

531

try:

532

self.record('START', subdir, name)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

533

self._record_prefix += '\t'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

534

try:

535

result = function(*args, **dargs)

536

finally:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

537

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

538

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

539

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

540

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

541

except Exception, e:

542

err_msg = str(e) + '\n'

543

err_msg += traceback.format_exc()

544

self.record('END ABORT', subdir, name, err_msg)

545

raise error.JobError(name + ' failed\n' + traceback.format_exc())

546

else:

547

self.record('END GOOD', subdir, name)

548

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

549

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

550

551

552

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

558

"""

559

560

name = function.__name__

561

562

# Allow the tag for the group to be specified.

563

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

567

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

568

569

570

def run_reboot(self, reboot_func, get_kernel_func):

571

"""\

572

A specialization of run_group meant specifically for handling

573

a reboot. Includes support for capturing the kernel version

574

after the reboot.

575

576

reboot_func: a function that carries out the reboot

577

578

get_kernel_func: a function that returns a string

579

representing the kernel version.

580

"""

581

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

582

old_record_prefix = self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

583

try:

584

self.record('START', None, 'reboot')

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

585

self._record_prefix += '\t'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

586

reboot_func()

587

except Exception, e:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

588

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

589

err_msg = str(e) + '\n' + traceback.format_exc()

590

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

591

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

592

else:

593

kernel = get_kernel_func()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

594

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

595

self.record('END GOOD', None, 'reboot',

596

optional_fields={"kernel": kernel})

597

598

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

599

def run_control(self, path):

600

"""Execute a control file found at path (relative to the autotest

601

path). Intended for executing a control file within a control file,

602

not for running the top-level job control file."""

603

path = os.path.join(self.autodir, path)

604

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

605

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

606

607

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

608

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

609

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

614

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

615

616

617

def _add_sysinfo_loggable(self, loggable, on_every_test):

618

if on_every_test:

619

self.sysinfo.test_loggables.add(loggable)

620

else:

621

self.sysinfo.boot_loggables.add(loggable)

622

623

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

624

def record(self, status_code, subdir, operation, status='',

625

optional_fields=None):

626

"""

627

Record job-level status

628

629

The intent is to make this file both machine parseable and

630

human readable. That involves a little more complexity, but

631

really isn't all that bad ;-)

632

633

Format is <status code>\t<subdir>\t<operation>\t<status>

634

mbligh

1b3b376

2008-09-25 02:46:34 +0000

[diff] [blame]

635

status code: see common_lib.log.is_valid_status()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

636

for valid status definition

637

638

subdir: MUST be a relevant subdirectory in the results,

639

or None, which will be represented as '----'

640

641

operation: description of what you ran (e.g. "dbench", or

642

"mkfs -t foobar /dev/sda9")

643

644

status: error message or "completed sucessfully"

645

646

------------------------------------------------------------

647

648

Initial tabs indicate indent levels for grouping, and is

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

649

governed by self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

650

651

multiline messages have secondary lines prefaced by a double

652

space (' ')

653

654

Executing this method will trigger the logging of all new

655

warnings to date from the various console loggers.

656

"""

657

# poll all our warning loggers for new warnings

658

warnings = self._read_warnings()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

659

old_record_prefix = self._record_prefix

jadmanski

2de8311

2009-04-01 18:21:04 +0000

[diff] [blame]

660

try:

661

if status_code.startswith("END "):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

662

self._record_prefix += "\t"

jadmanski

2de8311

2009-04-01 18:21:04 +0000

[diff] [blame]

663

for timestamp, msg in warnings:

664

self._record("WARN", None, None, msg, timestamp)

665

finally:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

666

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

667

668

# write out the actual status log line

669

self._record(status_code, subdir, operation, status,

670

optional_fields=optional_fields)

671

672

673

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

674

"""Poll all the warning loggers and extract any new warnings that have

675

been logged. If the warnings belong to a category that is currently

676

disabled, this method will discard them and they will no longer be

677

retrievable.

678

679

Returns a list of (timestamp, message) tuples, where timestamp is an

680

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

681

warnings = []

682

while True:

683

# pull in a line of output from every logger that has

684

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

685

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

686

closed_loggers = set()

687

for logger in loggers:

688

line = logger.readline()

689

# record any broken pipes (aka line == empty)

690

if len(line) == 0:

691

closed_loggers.add(logger)

692

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

693

# parse out the warning

694

timestamp, msgtype, msg = line.split('\t', 2)

695

timestamp = int(timestamp)

696

# if the warning is valid, add it to the results

697

if self.warning_manager.is_valid(timestamp, msgtype):

698

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

699

700

# stop listening to loggers that are closed

701

self.warning_loggers -= closed_loggers

702

703

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

712

def _unique_subdirectory(self, base_subdirectory_name):

713

"""Compute a unique results subdirectory based on the given name.

714

715

Appends base_subdirectory_name with a number as necessary to find a

716

directory name that doesn't already exist.

717

"""

718

subdirectory = base_subdirectory_name

719

counter = 1

720

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

721

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

def record_summary(self, status_code, test_name, reason='', attributes=None,

727

distinguishing_attributes=(), child_test_ids=None):

728

"""Record a summary test result.

729

730

@param status_code: status code string, see

731

common_lib.log.is_valid_status()

732

@param test_name: name of the test

733

@param reason: (optional) string providing detailed reason for test

734

outcome

735

@param attributes: (optional) dict of string keyvals to associate with

736

this result

737

@param distinguishing_attributes: (optional) list of attribute names

738

that should be used to distinguish identically-named test

739

results. These attributes should be present in the attributes

740

parameter. This is used to generate user-friendly subdirectory

741

names.

742

@param child_test_ids: (optional) list of test indices for test results

743

used in generating this result.

744

"""

745

subdirectory_name_parts = [test_name]

746

for attribute in distinguishing_attributes:

747

assert attributes

748

assert attribute in attributes, '%s not in %s' % (attribute,

749

attributes)

750

subdirectory_name_parts.append(attributes[attribute])

751

base_subdirectory_name = '.'.join(subdirectory_name_parts)

752

753

subdirectory = self._unique_subdirectory(base_subdirectory_name)

754

subdirectory_path = os.path.join(self.resultdir, subdirectory)

755

os.mkdir(subdirectory_path)

756

757

self.record(status_code, subdirectory, test_name,

758

status=reason, optional_fields={'is_summary': True})

759

760

if attributes:

761

utils.write_keyval(subdirectory_path, attributes)

762

763

if child_test_ids:

764

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

765

summary_data = {'child_test_ids': ids_string}

766

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

770

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

771

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

772

self.record("INFO", None, None,

773

"disabling %s warnings" % warning_type,

774

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

775

776

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

777

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

778

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

779

self.record("INFO", None, None,

780

"enabling %s warnings" % warning_type,

781

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

782

783

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

784

def get_status_log_path(self, subdir=None):

785

"""Return the path to the job status log.

786

787

@param subdir - Optional paramter indicating that you want the path

788

to a subdirectory status log.

789

790

@returns The path where the status log should be.

791

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

792

if self.resultdir:

793

if subdir:

794

return os.path.join(self.resultdir, subdir, "status.log")

795

else:

796

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

797

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

798

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

799

800

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

801

def _update_uncollected_logs_list(self, update_func):

802

"""Updates the uncollected logs list in a multi-process safe manner.

803

804

@param update_func - a function that updates the list of uncollected

805

logs. Should take one parameter, the list to be updated.

806

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

807

if self._uncollected_log_file:

808

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

809

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

810

try:

811

uncollected_logs = pickle.load(log_file)

812

update_func(uncollected_logs)

813

log_file.seek(0)

814

log_file.truncate()

815

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

816

log_file.flush()

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

817

finally:

818

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

823

"""Adds a new set of client logs to the list of uncollected logs,

824

to allow for future log recovery.

825

826

@param host - the hostname of the machine holding the logs

827

@param remote_path - the directory on the remote machine holding logs

828

@param local_path - the local directory to copy the logs into

829

"""

830

def update_func(logs_list):

831

logs_list.append((hostname, remote_path, local_path))

832

self._update_uncollected_logs_list(update_func)

833

834

835

def remove_client_log(self, hostname, remote_path, local_path):

836

"""Removes a set of client logs from the list of uncollected logs,

837

to allow for future log recovery.

838

839

@param host - the hostname of the machine holding the logs

840

@param remote_path - the directory on the remote machine holding logs

841

@param local_path - the local directory to copy the logs into

842

"""

843

def update_func(logs_list):

844

logs_list.remove((hostname, remote_path, local_path))

845

self._update_uncollected_logs_list(update_func)

846

847

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

848

def get_client_logs(self):

849

"""Retrieves the list of uncollected logs, if it exists.

850

851

@returns A list of (host, remote_path, local_path) tuples. Returns

852

an empty list if no uncollected logs file exists.

853

"""

854

log_exists = (self._uncollected_log_file and

855

os.path.exists(self._uncollected_log_file))

856

if log_exists:

857

return pickle.load(open(self._uncollected_log_file))

else:

return []

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

862

def _render_record(self, status_code, subdir, operation, status='',

863

epoch_time=None, record_prefix=None,

864

optional_fields=None):

865

"""

866

Internal Function to generate a record to be written into a

867

status log. For use by server_job.* classes only.

868

"""

869

if subdir:

870

if re.match(r'[\n\t]', subdir):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

871

raise ValueError('Invalid character in subdir string')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

substr = subdir

else:

substr = '----'

mbligh

2008-09-25 02:46:34 +0000

[diff] [blame]

876

if not log.is_valid_status(status_code):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

877

raise ValueError('Invalid status code supplied: %s' % status_code)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

878

if not operation:

879

operation = '----'

880

if re.match(r'[\n\t]', operation):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

881

raise ValueError('Invalid character in operation string')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

882

operation = operation.rstrip()

883

status = status.rstrip()

884

status = re.sub(r"\t", " ", status)

885

# Ensure any continuation lines are marked so we can

886

# detect them in the status file to ensure it is parsable.

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

887

status = re.sub(r"\n", "\n" + self._record_prefix + " ", status)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

888

889

if not optional_fields:

890

optional_fields = {}

891

892

# Generate timestamps for inclusion in the logs

893

if epoch_time is None:

894

epoch_time = int(time.time())

895

local_time = time.localtime(epoch_time)

896

optional_fields["timestamp"] = str(epoch_time)

897

optional_fields["localtime"] = time.strftime("%b %d %H:%M:%S",

898

local_time)

899

900

fields = [status_code, substr, operation]

901

fields += ["%s=%s" % x for x in optional_fields.iteritems()]

902

fields.append(status)

903

904

if record_prefix is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

905

record_prefix = self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

906

907

msg = '\t'.join(str(x) for x in fields)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

908

return record_prefix + msg + '\n'

909

910

911

def _record_prerendered(self, msg):

912

"""

913

Record a pre-rendered msg into the status logs. The only

914

change this makes to the message is to add on the local

915

indentation. Should not be called outside of server_job.*

916

classes. Unlike _record, this does not write the message

917

to standard output.

918

"""

919

lines = []

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

920

status_file = self.get_status_log_path()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

921

status_log = open(status_file, 'a')

922

for line in msg.splitlines():

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

923

line = self._record_prefix + line + '\n'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

924

lines.append(line)

925

status_log.write(line)

926

status_log.close()

927

self.__parse_status(lines)

928

929

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

930

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

931

"""

932

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

933

934

This sets up the control file API by importing modules and making them

935

available under the appropriate names within namespace.

936

937

For use by _execute_code().

938

939

Args:

940

namespace: The namespace dictionary to fill in.

941

protect: Boolean. If True (the default) any operation that would

942

clobber an existing entry in namespace will cause an error.

943

Raises:

944

error.AutoservError: When a name would be clobbered by import.

945

"""

946

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

947

"""

948

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

949

950

Args:

951

module_name: The string module name.

952

names: A limiting list of names to import from module_name. If

953

empty (the default), all names are imported from the module

954

similar to a "from foo.bar import *" statement.

955

Raises:

956

error.AutoservError: When a name being imported would clobber

957

a name already in namespace.

958

"""

959

module = __import__(module_name, {}, {}, names)

960

961

# No names supplied? Import * from the lowest level module.

962

# (Ugh, why do I have to implement this part myself?)

963

if not names:

964

for submodule_name in module_name.split('.')[1:]:

965

module = getattr(module, submodule_name)

966

if hasattr(module, '__all__'):

967

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

972

# doesn't override anything that already exists.

973

for name in names:

974

# Check for conflicts to help prevent future problems.

975

if name in namespace and protect:

976

if namespace[name] is not getattr(module, name):

977

raise error.AutoservError('importing name '

978

'%s from %s %r would override %r' %

979

(name, module_name, getattr(module, name),

980

namespace[name]))

981

else:

982

# Encourage cleanliness and the use of __all__ for a

983

# more concrete API with less surprises on '*' imports.

984

warnings.warn('%s (%r) being imported from %s for use '

985

'in server control files is not the '

986

'first occurrance of that import.' %

987

(name, namespace[name], module_name))

988

989

namespace[name] = getattr(module, name)

990

991

992

# This is the equivalent of prepending a bunch of import statements to

993

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

994

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

995

_import_names('autotest_lib.server',

996

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

997

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

998

_import_names('autotest_lib.server.subcommand',

999

('parallel', 'parallel_simple', 'subcommand'))

1000

_import_names('autotest_lib.server.utils',

1001

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

1002

_import_names('autotest_lib.client.common_lib.error')

1003

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

1004

1005

# Inject ourself as the job object into other classes within the API.

1006

# (Yuck, this injection is a gross thing be part of a public API. -gps)

1007

#

1008

# XXX Base & SiteAutotest do not appear to use .job. Who does?

1009

namespace['autotest'].Autotest.job = self

1010

# server.hosts.base_classes.Host uses .job.

1011

namespace['hosts'].Host.job = self

1012

1013

1014

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1015

"""

1016

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1017

1018

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

1023

namespace: A dict containing names to make available during execution.

1024

protect: Boolean. If True (the default) a copy of the namespace dict

1025

is used during execution to prevent the code from modifying its

1026

contents outside of this function. If False the raw dict is

1027

passed in and modifications will be allowed.

1028

"""

1029

if protect:

1030

namespace = namespace.copy()

1031

self._fill_server_control_namespace(namespace, protect=protect)

1032

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1033

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1034

machines_text = '\n'.join(self.machines) + '\n'

1035

# Only rewrite the file if it does not match our machine list.

1036

try:

1037

machines_f = open(MACHINES_FILENAME, 'r')

1038

existing_machines_text = machines_f.read()

1039

machines_f.close()

1040

except EnvironmentError:

1041

existing_machines_text = None

1042

if machines_text != existing_machines_text:

1043

utils.open_write_close(MACHINES_FILENAME, machines_text)

1044

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1045

1046

1047

def _record(self, status_code, subdir, operation, status='',

1048

epoch_time=None, optional_fields=None):

1049

"""

1050

Actual function for recording a single line into the status

1051

logs. Should never be called directly, only by job.record as

1052

this would bypass the console monitor logging.

1053

"""

1054

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1055

msg = self._render_record(status_code, subdir, operation, status,

1056

epoch_time, optional_fields=optional_fields)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1057

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

1058

status_file = self.get_status_log_path()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1059

sys.stdout.write(msg)

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

1060

if status_file:

1061

open(status_file, "a").write(msg)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1062

if subdir:

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

1063

sub_status_file = self.get_status_log_path(subdir)

1064

open(sub_status_file, "a").write(msg)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1065

self.__parse_status(msg.splitlines())

1066

1067

1068

def __parse_status(self, new_lines):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1069

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1070

return

1071

new_tests = self.parser.process_lines(new_lines)

1072

for test in new_tests:

1073

self.__insert_test(test)

1074

1075

1076

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1077

"""

1078

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1079

database. This method will not raise an exception, even if an

1080

error occurs during the insert, to avoid failing a test

1081

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1082

self.num_tests_run += 1

1083

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1084

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1085

try:

1086

self.results_db.insert_test(self.job_model, test)

1087

except Exception:

1088

msg = ("WARNING: An unexpected error occured while "

1089

"inserting test results into the database. "

1090

"Ignoring error.\n" + traceback.format_exc())

1091

print >> sys.stderr, msg

1092

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1093

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1094

def preprocess_client_state(self):

1095

"""

1096

Produce a state file for initializing the state of a client job.

1097

1098

Creates a new client state file with all the current server state, as

1099

well as some pre-set client state.

1100

1101

@returns The path of the file the state was written into.

1102

"""

1103

# initialize the sysinfo state

1104

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1105

1106

# dump the state out to a tempfile

1107

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1108

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1109

1110

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1111

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1116

"""

1117

Update the state of this job with the state from a client job.

1118

1119

Updates the state of the server side of a job with the final state

1120

of a client job that was run. Updates the non-client-specific state,

1121

pulls in some specific bits from the client-specific state, and then

1122

discards the rest. Removes the state file afterwards

1123

1124

@param state_file A path to the state file from the client.

1125

"""

1126

# update the on-disk state

1127

self._state.read_from_file(state_path)

1128

try:

1129

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1130

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1131

# ignore file-not-found errors

1132

if e.errno != errno.ENOENT:

1133

raise

1134

1135

# update the sysinfo state

1136

if self._state.has('client', 'sysinfo'):

1137

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1138

1139

# drop all the client-specific state

1140

self._state.discard_namespace('client')

1141

1142

mbligh

a700772

2009-01-13 00:37:11 +0000

[diff] [blame]

1143

site_server_job = utils.import_site_class(

1144

__file__, "autotest_lib.server.site_server_job", "site_server_job",

1145

base_server_job)

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1146

mbligh

0a8c332

2009-04-28 18:32:19 +0000

[diff] [blame]

1147

class server_job(site_server_job):

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1148

pass

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1149

1150

1151

class warning_manager(object):

1152

"""Class for controlling warning logs. Manages the enabling and disabling

1153

of warnings."""

1154

def __init__(self):

1155

# a map of warning types to a list of disabled time intervals

1156

self.disabled_warnings = {}

1157

1158

1159

def is_valid(self, timestamp, warning_type):

1160

"""Indicates if a warning (based on the time it occured and its type)

1161

is a valid warning. A warning is considered "invalid" if this type of

1162

warning was marked as "disabled" at the time the warning occured."""

1163

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1164

for start, end in disabled_intervals:

1165

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1171

"""As of now, disables all further warnings of this type."""

1172

intervals = self.disabled_warnings.setdefault(warning_type, [])

1173

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1174

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1175

1176

1177

def enable_warnings(self, warning_type, current_time_func=time.time):

1178

"""As of now, enables all further warnings of this type."""

1179

intervals = self.disabled_warnings.get(warning_type, [])

1180

if intervals and intervals[-1][1] is None:

jadmanski