Blame - server/server_job.py - chromium.googlesource.com/chromiumos/platform/tauto

2009-03-19 20:25:24 +0000

[diff] [blame]

9

import getpass, os, sys, re, stat, tempfile, time, select, subprocess

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

10

import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

11

from autotest_lib.client.bin import sysinfo

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

12

from autotest_lib.client.common_lib import base_job

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

13

from autotest_lib.client.common_lib import error, log, utils, packages

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

14

from autotest_lib.client.common_lib import logging_manager

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

15

from autotest_lib.server import test, subcommand, profilers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

16

from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

17

18

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

19

def _control_segment_path(name):

20

"""Get the pathname of the named control segment file."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

21

server_dir = os.path.dirname(os.path.abspath(__file__))

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

22

return os.path.join(server_dir, "control_segments", name)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

23

24

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

25

CLIENT_CONTROL_FILENAME = 'control'

26

SERVER_CONTROL_FILENAME = 'control.srv'

27

MACHINES_FILENAME = '.machines'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

28

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

29

CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')

30

CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')

31

CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

32

INSTALL_CONTROL_FILE = _control_segment_path('install')

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

33

CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

34

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

35

VERIFY_CONTROL_FILE = _control_segment_path('verify')

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

36

REPAIR_CONTROL_FILE = _control_segment_path('repair')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

37

38

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

39

# by default provide a stub that generates no site data

40

def _get_site_job_data_dummy(job):

return {}

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

44

# load up site-specific code for generating site-specific job data

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

45

get_site_job_data = utils.import_site_function(__file__,

jadmanski

c0a623d

2009-03-03 21:11:48 +0000

[diff] [blame]

46

"autotest_lib.server.site_server_job", "get_site_job_data",

mbligh

062ed15

2009-01-13 00:57:14 +0000

[diff] [blame]

47

_get_site_job_data_dummy)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

48

49

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

50

class base_server_job(base_job.base_job):

51

"""The server-side concrete implementation of base_job.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

52

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

53

Optional properties provided by this implementation:

serverdir

conmuxdir

num_tests_run

num_tests_failed

warning_manager

warning_loggers

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

62

"""

63

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

64

_STATUS_VERSION = 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

65

66

def __init__(self, control, args, resultdir, label, user, machines,

67

client=False, parse_job='',

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

68

ssh_user='root', ssh_port=22, ssh_pass='',

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame^]

69

group_name='', tag='',

70

control_filename=SERVER_CONTROL_FILENAME):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

71

"""

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

72

Create a server side job object.

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

73

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

74

@param control: The pathname of the control file.

75

@param args: Passed to the control file.

76

@param resultdir: Where to throw the results.

77

@param label: Description of the job.

78

@param user: Username for the job (email address).

79

@param client: True if this is a client-side control file.

80

@param parse_job: string, if supplied it is the job execution tag that

81

the results will be passed through to the TKO parser with.

82

@param ssh_user: The SSH username. [root]

83

@param ssh_port: The SSH port number. [22]

84

@param ssh_pass: The SSH passphrase, if needed.

85

@param group_name: If supplied, this will be written out as

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

86

host_group_name in the keyvals file for the parser.

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

87

@param tag: The job execution tag from the scheduler. [optional]

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame^]

88

@param control_filename: The filename where the server control file

89

should be written in the results directory.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

90

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

91

super(base_server_job, self).__init__(resultdir=resultdir)

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

92

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

93

path = os.path.dirname(__file__)

94

self.control = control

95

self._uncollected_log_file = os.path.join(self.resultdir,

96

'uncollected_logs')

97

debugdir = os.path.join(self.resultdir, 'debug')

98

if not os.path.exists(debugdir):

os.mkdir(debugdir)

if user:

self.user = user

else:

self.user = getpass.getuser()

105

106

self._args = args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

107

self.machines = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

108

self._client = client

109

self._record_prefix = ''

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

110

self.warning_loggers = set()

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

111

self.warning_manager = warning_manager()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

112

self._ssh_user = ssh_user

113

self._ssh_port = ssh_port

114

self._ssh_pass = ssh_pass

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

115

self.tag = tag

mbligh

0910844

2008-10-15 16:27:38 +0000

[diff] [blame]

116

self.last_boot_tag = None

jadmanski

53aaf38

2008-11-17 16:22:31 +0000

[diff] [blame]

117

self.hosts = set()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

118

self.drop_caches = False

mbligh

b5dac43

2008-11-27 00:38:44 +0000

[diff] [blame]

119

self.drop_caches_between_iterations = False

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame^]

120

self._control_filename = control_filename

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

121

showard

75cdfee

2009-06-10 17:40:41 +0000

[diff] [blame]

122

self.logging = logging_manager.get_logging_manager(

123

manage_stdout_and_stderr=True, redirect_fds=True)

124

subcommand.logging_manager_object = self.logging

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

125

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

126

self.sysinfo = sysinfo.sysinfo(self.resultdir)

jadmanski

043e113

2008-11-19 17:10:32 +0000

[diff] [blame]

127

self.profilers = profilers.profilers(self)

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

128

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

129

job_data = {'label' : label, 'user' : user,

130

'hostname' : ','.join(machines),

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

131

'status_version' : str(self._STATUS_VERSION),

showard

170873e

2009-01-07 00:22:26 +0000

[diff] [blame]

132

'job_started' : str(int(time.time()))}

mbligh

2009-05-13 21:29:45 +0000

[diff] [blame]

133

if group_name:

134

job_data['host_group_name'] = group_name

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

135

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

136

# only write these keyvals out on the first job in a resultdir

137

if 'job_started' not in utils.read_keyval(self.resultdir):

138

job_data.update(get_site_job_data(self))

139

utils.write_keyval(self.resultdir, job_data)

140

141

self._parse_job = parse_job

showard

cc92936

2010-01-25 21:20:41 +0000

[diff] [blame]

142

self._using_parser = (self._parse_job and len(machines) <= 1)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

143

self.pkgmgr = packages.PackageManager(

144

self.autodir, run_function_dargs={'timeout':600})

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

145

self.num_tests_run = 0

146

self.num_tests_failed = 0

147

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

148

self._register_subcommand_hooks()

149

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

150

# these components aren't usable on the server

151

self.bootloader = None

self.harness = None

@classmethod

def _find_base_directories(cls):

157

"""

158

Determine locations of autodir, clientdir and serverdir. Assumes

159

that this file is located within serverdir and uses __file__ along

160

with relative paths to resolve the location.

161

"""

162

serverdir = os.path.abspath(os.path.dirname(__file__))

163

autodir = os.path.normpath(os.path.join(serverdir, '..'))

164

clientdir = os.path.join(autodir, 'client')

165

return autodir, clientdir, serverdir

166

167

168

def _find_resultdir(self, resultdir):

169

"""

170

Determine the location of resultdir. For server jobs we expect one to

171

always be explicitly passed in to __init__, so just return that.

172

"""

173

if resultdir:

174

return os.path.normpath(resultdir)

else:

return None

jadmanski

2008-11-20 16:32:08 +0000

[diff] [blame]

178

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

179

@staticmethod

180

def _load_control_file(path):

181

f = open(path)

182

try:

183

control_file = f.read()

184

finally:

185

f.close()

186

return re.sub('\r', '', control_file)

187

188

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

189

def _register_subcommand_hooks(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

190

"""

191

Register some hooks into the subcommand modules that allow us

192

to properly clean up self.hosts created in forked subprocesses.

193

"""

jadmanski

550fdc2

2008-11-20 16:32:08 +0000

[diff] [blame]

194

def on_fork(cmd):

195

self._existing_hosts_on_fork = set(self.hosts)

196

def on_join(cmd):

197

new_hosts = self.hosts - self._existing_hosts_on_fork

198

for host in new_hosts:

199

host.close()

200

subcommand.subcommand.register_fork_hook(on_fork)

201

subcommand.subcommand.register_join_hook(on_join)

202

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

203

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

204

def init_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

205

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

206

Start the continuous parsing of self.resultdir. This sets up

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

207

the database connection and inserts the basic job object into

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

208

the database if necessary.

209

"""

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

210

if not self._using_parser:

211

return

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

212

# redirect parser debugging to .parse.log

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

213

parse_log = os.path.join(self.resultdir, '.parse.log')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

214

parse_log = open(parse_log, 'w', 0)

215

tko_utils.redirect_parser_debugging(parse_log)

216

# create a job model object and set up the db

217

self.results_db = tko_db.db(autocommit=True)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

218

self.parser = status_lib.parser(self._STATUS_VERSION)

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

219

self.job_model = self.parser.make_job(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

220

self.parser.start(self.job_model)

221

# check if a job already exists in the db and insert it if

222

# it does not

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

223

job_idx = self.results_db.find_job(self._parse_job)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

224

if job_idx is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

225

self.results_db.insert_job(self._parse_job, self.job_model)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

226

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

227

machine_idx = self.results_db.lookup_machine(self.job_model.machine)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

228

self.job_model.index = job_idx

229

self.job_model.machine_idx = machine_idx

230

231

232

def cleanup_parser(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

233

"""

234

This should be called after the server job is finished

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

235

to carry out any remaining cleanup (e.g. flushing any

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

236

remaining test results to the results db)

237

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

238

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

239

return

240

final_tests = self.parser.end()

241

for test in final_tests:

242

self.__insert_test(test)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

243

self._using_parser = False

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def verify(self):

if not self.machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

248

raise error.AutoservError('No machines specified to verify')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

249

if self.resultdir:

250

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

251

try:

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

252

namespace = {'machines' : self.machines, 'job' : self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

253

'ssh_user' : self._ssh_user,

254

'ssh_port' : self._ssh_port,

255

'ssh_pass' : self._ssh_pass}

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

256

self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

257

except Exception, e:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

258

msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

259

self.record('ABORT', None, None, msg)

raise

def repair(self, host_protection):

264

if not self.machines:

265

raise error.AutoservError('No machines specified to repair')

mbligh

0fce411

2008-11-27 00:37:17 +0000

[diff] [blame]

266

if self.resultdir:

267

os.chdir(self.resultdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

268

namespace = {'machines': self.machines, 'job': self,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

269

'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,

270

'ssh_pass': self._ssh_pass,

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

271

'protection_level': host_protection}

mbligh

25c0b8c

2009-01-24 01:44:17 +0000

[diff] [blame]

272

mbligh

0931b0a

2009-04-08 17:44:48 +0000

[diff] [blame]

273

self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def precheck(self):

"""

perform any additional checks in derived classes.

"""

pass

def enable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

284

"""

285

Start or restart external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def disable_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

291

"""

292

Pause or stop external logging mechanism.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

pass

def use_external_logging(self):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

298

"""

299

Return True if external logging should be used.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

"""

return False

mbligh

2009-06-15 21:53:34 +0000

[diff] [blame]

304

def _make_parallel_wrapper(self, function, machines, log):

305

"""Wrap function as appropriate for calling by parallel_simple."""

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

306

is_forking = not (len(machines) == 1 and self.machines == machines)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

307

if self._parse_job and is_forking and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

308

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

309

self._parse_job += "/" + machine

310

self._using_parser = True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

311

self.machines = [machine]

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

312

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

313

os.chdir(self.resultdir)

showard

2bab8f4

2008-11-12 18:15:22 +0000

[diff] [blame]

314

utils.write_keyval(self.resultdir, {"hostname": machine})

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

315

self.init_parser()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

316

result = function(machine)

317

self.cleanup_parser()

318

return result

jadmanski

4dd1a00

2008-09-05 20:27:30 +0000

[diff] [blame]

319

elif len(machines) > 1 and log:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

320

def wrapper(machine):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

321

self.push_execution_context(machine)

jadmanski

609a5f4

2008-08-26 20:52:42 +0000

[diff] [blame]

322

os.chdir(self.resultdir)

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

323

machine_data = {'hostname' : machine,

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

324

'status_version' : str(self._STATUS_VERSION)}

mbligh

838d82d

2009-03-11 17:14:31 +0000

[diff] [blame]

325

utils.write_keyval(self.resultdir, machine_data)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

326

result = function(machine)

327

return result

328

else:

329

wrapper = function

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

return wrapper

def parallel_simple(self, function, machines, log=True, timeout=None,

334

return_results=False):

335

"""

336

Run 'function' using parallel_simple, with an extra wrapper to handle

337

the necessary setup for continuous parsing, if possible. If continuous

338

parsing is already properly initialized then this should just work.

339

340

@param function: A callable to run in parallel given each machine.

341

@param machines: A list of machine names to be passed one per subcommand

342

invocation of function.

343

@param log: If True, output will be written to output in a subdirectory

344

named after each machine.

345

@param timeout: Seconds after which the function call should timeout.

346

@param return_results: If True instead of an AutoServError being raised

347

on any error a list of the results|exceptions from the function

348

called on each arg is returned. [default: False]

349

350

@raises error.AutotestError: If any of the functions failed.

351

"""

352

wrapper = self._make_parallel_wrapper(function, machines, log)

353

return subcommand.parallel_simple(wrapper, machines,

354

log=log, timeout=timeout,

355

return_results=return_results)

356

357

358

def parallel_on_machines(self, function, machines, timeout=None):

359

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

360

@param function: Called in parallel with one machine as its argument.

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

361

@param machines: A list of machines to call function(machine) on.

362

@param timeout: Seconds after which the function call should timeout.

363

364

@returns A list of machines on which function(machine) returned

365

without raising an exception.

366

"""

showard

cd5fac4

2009-07-06 20:19:43 +0000

[diff] [blame]

367

results = self.parallel_simple(function, machines, timeout=timeout,

mbligh

415dc21

2009-06-15 21:53:34 +0000

[diff] [blame]

368

return_results=True)

369

success_machines = []

370

for result, machine in itertools.izip(results, machines):

371

if not isinstance(result, Exception):

372

success_machines.append(machine)

373

return success_machines

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

374

375

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

376

_USE_TEMP_DIR = object()

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

377

def run(self, cleanup=False, install_before=False, install_after=False,

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

378

collect_crashdumps=True, namespace={}, control=None,

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

379

control_file_dir=None, only_collect_crashinfo=False):

jadmanski

fb9c0fa

2009-04-29 17:39:16 +0000

[diff] [blame]

380

# for a normal job, make sure the uncollected logs file exists

381

# for a crashinfo-only run it should already exist, bail out otherwise

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

382

if self.resultdir and not os.path.exists(self._uncollected_log_file):

jadmanski

fb9c0fa

2009-04-29 17:39:16 +0000

[diff] [blame]

383

if only_collect_crashinfo:

384

# if this is a crashinfo-only run, and there were no existing

385

# uncollected logs, just bail out early

386

logging.info("No existing uncollected logs, "

387

"skipping crashinfo collection")

388

return

389

else:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

390

log_file = open(self._uncollected_log_file, "w")

jadmanski

fb9c0fa

2009-04-29 17:39:16 +0000

[diff] [blame]

391

pickle.dump([], log_file)

392

log_file.close()

393

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

394

# use a copy so changes don't affect the original dictionary

395

namespace = namespace.copy()

396

machines = self.machines

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

397

if control is None:

jadmanski

02a3ba2

2009-11-13 20:47:27 +0000

[diff] [blame]

398

if self.control is None:

399

control = ''

400

else:

401

control = self._load_control_file(self.control)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

402

if control_file_dir is None:

403

control_file_dir = self.resultdir

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

404

405

self.aborted = False

406

namespace['machines'] = machines

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

407

namespace['args'] = self._args

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

408

namespace['job'] = self

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

409

namespace['ssh_user'] = self._ssh_user

410

namespace['ssh_port'] = self._ssh_port

411

namespace['ssh_pass'] = self._ssh_pass

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

412

test_start_time = int(time.time())

413

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

414

if self.resultdir:

415

os.chdir(self.resultdir)

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

416

# touch status.log so that the parser knows a job is running here

jadmanski

382303a

2009-04-21 19:53:39 +0000

[diff] [blame]

417

open(self.get_status_log_path(), 'a').close()

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

418

self.enable_external_logging()

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

419

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

420

collect_crashinfo = True

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

421

temp_control_file_dir = None

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

422

try:

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

423

try:

424

if install_before and machines:

425

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

426

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

427

if only_collect_crashinfo:

428

return

429

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

430

# determine the dir to write the control files to

431

cfd_specified = (control_file_dir

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

432

and control_file_dir is not self._USE_TEMP_DIR)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

433

if cfd_specified:

434

temp_control_file_dir = None

435

else:

436

temp_control_file_dir = tempfile.mkdtemp(

437

suffix='temp_control_file_dir')

438

control_file_dir = temp_control_file_dir

439

server_control_file = os.path.join(control_file_dir,

mbligh

2010-03-11 18:03:07 +0000

[diff] [blame^]

440

self._control_filename)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

441

client_control_file = os.path.join(control_file_dir,

442

CLIENT_CONTROL_FILENAME)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

443

if self._client:

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

444

namespace['control'] = control

445

utils.open_write_close(client_control_file, control)

mbligh

feac010

2009-04-28 18:31:12 +0000

[diff] [blame]

446

shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,

447

server_control_file)

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

448

else:

449

utils.open_write_close(server_control_file, control)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

450

logging.info("Processing control file")

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

451

self._execute_code(server_control_file, namespace)

mbligh

26f0d88

2009-06-22 18:30:01 +0000

[diff] [blame]

452

logging.info("Finished processing control file")

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

453

jadmanski

2009-03-25 20:07:10 +0000

[diff] [blame]

454

# no error occured, so we don't need to collect crashinfo

455

collect_crashinfo = False

showard

cf8d492

2009-10-14 16:08:39 +0000

[diff] [blame]

except:

try:

logging.exception(

'Exception escaped control file, job aborting:')

460

except:

461

pass # don't let logging exceptions here interfere

462

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

463

finally:

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

464

if temp_control_file_dir:

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

465

# Clean up temp directory used for copies of the control files

mbligh

aebe3b6

2008-12-22 14:45:40 +0000

[diff] [blame]

466

try:

467

shutil.rmtree(temp_control_file_dir)

468

except Exception, e:

mbligh

2009-07-02 19:02:33 +0000

[diff] [blame]

469

logging.warn('Could not remove temp directory %s: %s',

470

temp_control_file_dir, e)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

471

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

472

if machines and (collect_crashdumps or collect_crashinfo):

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

473

namespace['test_start_time'] = test_start_time

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

474

if collect_crashinfo:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

475

# includes crashdumps

476

self._execute_code(CRASHINFO_CONTROL_FILE, namespace)

jadmanski

2008-09-19 21:21:31 +0000

[diff] [blame]

477

else:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

478

self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

479

if self._uncollected_log_file:

480

os.remove(self._uncollected_log_file)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

481

self.disable_external_logging()

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

482

if cleanup and machines:

483

self._execute_code(CLEANUP_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

484

if install_after and machines:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

485

self._execute_code(INSTALL_CONTROL_FILE, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

486

487

488

def run_test(self, url, *args, **dargs):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

489

"""

490

Summon a test object and run it.

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

491

492

tag

493

tag to add to testname

494

url

495

url of the test to run

496

"""

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

497

group, testname = self.pkgmgr.get_package_name(url, 'test')

498

testname, subdir, tag = self._build_tagged_test_name(testname, dargs)

499

outputdir = self._make_test_outputdir(subdir)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

def group_func():

try:

test.runtest(self, url, tag, args, dargs)

504

except error.TestBaseException, e:

505

self.record(e.exit_status, subdir, testname, str(e))

506

raise

507

except Exception, e:

508

info = str(e) + "\n" + traceback.format_exc()

509

self.record('FAIL', subdir, testname, info)

510

raise

511

else:

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

512

self.record('GOOD', subdir, testname, 'completed successfully')

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

513

514

result, exc_info = self._run_group(testname, subdir, group_func)

515

if exc_info and isinstance(exc_info[1], error.TestBaseException):

516

return False

517

elif exc_info:

518

raise exc_info[0], exc_info[1], exc_info[2]

519

else:

520

return True

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

521

522

523

def _run_group(self, name, subdir, function, *args, **dargs):

524

"""\

525

Underlying method for running something inside of a group.

526

"""

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

527

result, exc_info = None, None

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

528

old_record_prefix = self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

529

try:

530

self.record('START', subdir, name)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

531

self._record_prefix += '\t'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

532

try:

533

result = function(*args, **dargs)

534

finally:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

535

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

536

except error.TestBaseException, e:

jadmanski

b88d6dc

2009-01-10 00:33:18 +0000

[diff] [blame]

537

self.record("END %s" % e.exit_status, subdir, name)

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

538

exc_info = sys.exc_info()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

539

except Exception, e:

540

err_msg = str(e) + '\n'

541

err_msg += traceback.format_exc()

542

self.record('END ABORT', subdir, name, err_msg)

543

raise error.JobError(name + ' failed\n' + traceback.format_exc())

544

else:

545

self.record('END GOOD', subdir, name)

546

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

547

return result, exc_info

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

548

549

550

def run_group(self, function, *args, **dargs):

"""\

function:

subroutine to run

*args:

arguments for the function

556

"""

557

558

name = function.__name__

559

560

# Allow the tag for the group to be specified.

561

tag = dargs.pop('tag', None)

if tag:

name = tag

jadmanski

2008-08-26 20:51:14 +0000

[diff] [blame]

565

return self._run_group(name, None, function, *args, **dargs)[0]

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

566

567

568

def run_reboot(self, reboot_func, get_kernel_func):

569

"""\

570

A specialization of run_group meant specifically for handling

571

a reboot. Includes support for capturing the kernel version

572

after the reboot.

573

574

reboot_func: a function that carries out the reboot

575

576

get_kernel_func: a function that returns a string

577

representing the kernel version.

578

"""

579

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

580

old_record_prefix = self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

581

try:

582

self.record('START', None, 'reboot')

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

583

self._record_prefix += '\t'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

584

reboot_func()

585

except Exception, e:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

586

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

587

err_msg = str(e) + '\n' + traceback.format_exc()

588

self.record('END FAIL', None, 'reboot', err_msg)

jadmanski

4b51d54

2009-04-08 14:17:16 +0000

[diff] [blame]

589

raise

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

590

else:

591

kernel = get_kernel_func()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

592

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

593

self.record('END GOOD', None, 'reboot',

594

optional_fields={"kernel": kernel})

595

596

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

597

def run_control(self, path):

598

"""Execute a control file found at path (relative to the autotest

599

path). Intended for executing a control file within a control file,

600

not for running the top-level job control file."""

601

path = os.path.join(self.autodir, path)

602

control_file = self._load_control_file(path)

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

603

self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)

jadmanski

2009-01-30 15:04:51 +0000

[diff] [blame]

604

605

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

606

def add_sysinfo_command(self, command, logfile=None, on_every_test=False):

mbligh

4395bbd

2009-03-25 19:34:17 +0000

[diff] [blame]

607

self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),

jadmanski

c09fc15

2008-10-15 17:56:59 +0000

[diff] [blame]

on_every_test)

def add_sysinfo_logfile(self, file, on_every_test=False):

612

self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)

613

614

615

def _add_sysinfo_loggable(self, loggable, on_every_test):

616

if on_every_test:

617

self.sysinfo.test_loggables.add(loggable)

618

else:

619

self.sysinfo.boot_loggables.add(loggable)

620

621

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

622

def record(self, status_code, subdir, operation, status='',

623

optional_fields=None):

624

"""

625

Record job-level status

626

627

The intent is to make this file both machine parseable and

628

human readable. That involves a little more complexity, but

629

really isn't all that bad ;-)

630

631

Format is <status code>\t<subdir>\t<operation>\t<status>

632

mbligh

1b3b376

2008-09-25 02:46:34 +0000

[diff] [blame]

633

status code: see common_lib.log.is_valid_status()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

634

for valid status definition

635

636

subdir: MUST be a relevant subdirectory in the results,

637

or None, which will be represented as '----'

638

639

operation: description of what you ran (e.g. "dbench", or

640

"mkfs -t foobar /dev/sda9")

641

642

status: error message or "completed sucessfully"

643

644

------------------------------------------------------------

645

646

Initial tabs indicate indent levels for grouping, and is

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

647

governed by self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

648

649

multiline messages have secondary lines prefaced by a double

650

space (' ')

651

652

Executing this method will trigger the logging of all new

653

warnings to date from the various console loggers.

654

"""

655

# poll all our warning loggers for new warnings

656

warnings = self._read_warnings()

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

657

old_record_prefix = self._record_prefix

jadmanski

2de8311

2009-04-01 18:21:04 +0000

[diff] [blame]

658

try:

659

if status_code.startswith("END "):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

660

self._record_prefix += "\t"

jadmanski

2de8311

2009-04-01 18:21:04 +0000

[diff] [blame]

661

for timestamp, msg in warnings:

662

self._record("WARN", None, None, msg, timestamp)

663

finally:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

664

self._record_prefix = old_record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

665

666

# write out the actual status log line

667

self._record(status_code, subdir, operation, status,

668

optional_fields=optional_fields)

669

670

671

def _read_warnings(self):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

672

"""Poll all the warning loggers and extract any new warnings that have

673

been logged. If the warnings belong to a category that is currently

674

disabled, this method will discard them and they will no longer be

675

retrievable.

676

677

Returns a list of (timestamp, message) tuples, where timestamp is an

678

integer epoch timestamp."""

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

679

warnings = []

680

while True:

681

# pull in a line of output from every logger that has

682

# output ready to be read

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

683

loggers, _, _ = select.select(self.warning_loggers, [], [], 0)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

684

closed_loggers = set()

685

for logger in loggers:

686

line = logger.readline()

687

# record any broken pipes (aka line == empty)

688

if len(line) == 0:

689

closed_loggers.add(logger)

690

continue

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

691

# parse out the warning

692

timestamp, msgtype, msg = line.split('\t', 2)

693

timestamp = int(timestamp)

694

# if the warning is valid, add it to the results

695

if self.warning_manager.is_valid(timestamp, msgtype):

696

warnings.append((timestamp, msg.strip()))

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

697

698

# stop listening to loggers that are closed

699

self.warning_loggers -= closed_loggers

700

701

# stop if none of the loggers have any output left

if not loggers:

break

# sort into timestamp order

warnings.sort()

return warnings

showard

2010-01-25 21:20:41 +0000

[diff] [blame]

710

def _unique_subdirectory(self, base_subdirectory_name):

711

"""Compute a unique results subdirectory based on the given name.

712

713

Appends base_subdirectory_name with a number as necessary to find a

714

directory name that doesn't already exist.

715

"""

716

subdirectory = base_subdirectory_name

717

counter = 1

718

while os.path.exists(os.path.join(self.resultdir, subdirectory)):

719

subdirectory = base_subdirectory_name + '.' + str(counter)

counter += 1

return subdirectory

def record_summary(self, status_code, test_name, reason='', attributes=None,

725

distinguishing_attributes=(), child_test_ids=None):

726

"""Record a summary test result.

727

728

@param status_code: status code string, see

729

common_lib.log.is_valid_status()

730

@param test_name: name of the test

731

@param reason: (optional) string providing detailed reason for test

732

outcome

733

@param attributes: (optional) dict of string keyvals to associate with

734

this result

735

@param distinguishing_attributes: (optional) list of attribute names

736

that should be used to distinguish identically-named test

737

results. These attributes should be present in the attributes

738

parameter. This is used to generate user-friendly subdirectory

739

names.

740

@param child_test_ids: (optional) list of test indices for test results

741

used in generating this result.

742

"""

743

subdirectory_name_parts = [test_name]

744

for attribute in distinguishing_attributes:

745

assert attributes

746

assert attribute in attributes, '%s not in %s' % (attribute,

747

attributes)

748

subdirectory_name_parts.append(attributes[attribute])

749

base_subdirectory_name = '.'.join(subdirectory_name_parts)

750

751

subdirectory = self._unique_subdirectory(base_subdirectory_name)

752

subdirectory_path = os.path.join(self.resultdir, subdirectory)

753

os.mkdir(subdirectory_path)

754

755

self.record(status_code, subdirectory, test_name,

756

status=reason, optional_fields={'is_summary': True})

757

758

if attributes:

759

utils.write_keyval(subdirectory_path, attributes)

760

761

if child_test_ids:

762

ids_string = ','.join(str(test_id) for test_id in child_test_ids)

763

summary_data = {'child_test_ids': ids_string}

764

utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),

summary_data)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

768

def disable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

769

self.warning_manager.disable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

770

self.record("INFO", None, None,

771

"disabling %s warnings" % warning_type,

772

{"warnings.disable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

773

774

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

775

def enable_warnings(self, warning_type):

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

776

self.warning_manager.enable_warnings(warning_type)

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

777

self.record("INFO", None, None,

778

"enabling %s warnings" % warning_type,

779

{"warnings.enable": warning_type})

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

780

781

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

782

def get_status_log_path(self, subdir=None):

783

"""Return the path to the job status log.

784

785

@param subdir - Optional paramter indicating that you want the path

786

to a subdirectory status log.

787

788

@returns The path where the status log should be.

789

"""

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

790

if self.resultdir:

791

if subdir:

792

return os.path.join(self.resultdir, subdir, "status.log")

793

else:

794

return os.path.join(self.resultdir, "status.log")

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

795

else:

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

796

return None

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

797

798

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

799

def _update_uncollected_logs_list(self, update_func):

800

"""Updates the uncollected logs list in a multi-process safe manner.

801

802

@param update_func - a function that updates the list of uncollected

803

logs. Should take one parameter, the list to be updated.

804

"""

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

805

if self._uncollected_log_file:

806

log_file = open(self._uncollected_log_file, "r+")

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

807

fcntl.flock(log_file, fcntl.LOCK_EX)

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

808

try:

809

uncollected_logs = pickle.load(log_file)

810

update_func(uncollected_logs)

811

log_file.seek(0)

812

log_file.truncate()

813

pickle.dump(uncollected_logs, log_file)

jadmanski

3bff909

2009-04-22 18:09:47 +0000

[diff] [blame]

814

log_file.flush()

jadmanski

2009-03-19 20:25:24 +0000

[diff] [blame]

815

finally:

816

fcntl.flock(log_file, fcntl.LOCK_UN)

log_file.close()

def add_client_log(self, hostname, remote_path, local_path):

821

"""Adds a new set of client logs to the list of uncollected logs,

822

to allow for future log recovery.

823

824

@param host - the hostname of the machine holding the logs

825

@param remote_path - the directory on the remote machine holding logs

826

@param local_path - the local directory to copy the logs into

827

"""

828

def update_func(logs_list):

829

logs_list.append((hostname, remote_path, local_path))

830

self._update_uncollected_logs_list(update_func)

831

832

833

def remove_client_log(self, hostname, remote_path, local_path):

834

"""Removes a set of client logs from the list of uncollected logs,

835

to allow for future log recovery.

836

837

@param host - the hostname of the machine holding the logs

838

@param remote_path - the directory on the remote machine holding logs

839

@param local_path - the local directory to copy the logs into

840

"""

841

def update_func(logs_list):

842

logs_list.remove((hostname, remote_path, local_path))

843

self._update_uncollected_logs_list(update_func)

844

845

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

846

def get_client_logs(self):

847

"""Retrieves the list of uncollected logs, if it exists.

848

849

@returns A list of (host, remote_path, local_path) tuples. Returns

850

an empty list if no uncollected logs file exists.

851

"""

852

log_exists = (self._uncollected_log_file and

853

os.path.exists(self._uncollected_log_file))

854

if log_exists:

855

return pickle.load(open(self._uncollected_log_file))

else:

return []

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

860

def _render_record(self, status_code, subdir, operation, status='',

861

epoch_time=None, record_prefix=None,

862

optional_fields=None):

863

"""

864

Internal Function to generate a record to be written into a

865

status log. For use by server_job.* classes only.

866

"""

867

if subdir:

868

if re.match(r'[\n\t]', subdir):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

869

raise ValueError('Invalid character in subdir string')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

substr = subdir

else:

substr = '----'

mbligh

2008-09-25 02:46:34 +0000

[diff] [blame]

874

if not log.is_valid_status(status_code):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

875

raise ValueError('Invalid status code supplied: %s' % status_code)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

876

if not operation:

877

operation = '----'

878

if re.match(r'[\n\t]', operation):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

879

raise ValueError('Invalid character in operation string')

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

880

operation = operation.rstrip()

881

status = status.rstrip()

882

status = re.sub(r"\t", " ", status)

883

# Ensure any continuation lines are marked so we can

884

# detect them in the status file to ensure it is parsable.

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

885

status = re.sub(r"\n", "\n" + self._record_prefix + " ", status)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

886

887

if not optional_fields:

888

optional_fields = {}

889

890

# Generate timestamps for inclusion in the logs

891

if epoch_time is None:

892

epoch_time = int(time.time())

893

local_time = time.localtime(epoch_time)

894

optional_fields["timestamp"] = str(epoch_time)

895

optional_fields["localtime"] = time.strftime("%b %d %H:%M:%S",

896

local_time)

897

898

fields = [status_code, substr, operation]

899

fields += ["%s=%s" % x for x in optional_fields.iteritems()]

900

fields.append(status)

901

902

if record_prefix is None:

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

903

record_prefix = self._record_prefix

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

904

905

msg = '\t'.join(str(x) for x in fields)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

906

return record_prefix + msg + '\n'

907

908

909

def _record_prerendered(self, msg):

910

"""

911

Record a pre-rendered msg into the status logs. The only

912

change this makes to the message is to add on the local

913

indentation. Should not be called outside of server_job.*

914

classes. Unlike _record, this does not write the message

915

to standard output.

916

"""

917

lines = []

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

918

status_file = self.get_status_log_path()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

919

status_log = open(status_file, 'a')

920

for line in msg.splitlines():

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

921

line = self._record_prefix + line + '\n'

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

922

lines.append(line)

923

status_log.write(line)

924

status_log.close()

925

self.__parse_status(lines)

926

927

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

928

def _fill_server_control_namespace(self, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

929

"""

930

Prepare a namespace to be used when executing server control files.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

931

932

This sets up the control file API by importing modules and making them

933

available under the appropriate names within namespace.

934

935

For use by _execute_code().

936

937

Args:

938

namespace: The namespace dictionary to fill in.

939

protect: Boolean. If True (the default) any operation that would

940

clobber an existing entry in namespace will cause an error.

941

Raises:

942

error.AutoservError: When a name would be clobbered by import.

943

"""

944

def _import_names(module_name, names=()):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

945

"""

946

Import a module and assign named attributes into namespace.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

947

948

Args:

949

module_name: The string module name.

950

names: A limiting list of names to import from module_name. If

951

empty (the default), all names are imported from the module

952

similar to a "from foo.bar import *" statement.

953

Raises:

954

error.AutoservError: When a name being imported would clobber

955

a name already in namespace.

956

"""

957

module = __import__(module_name, {}, {}, names)

958

959

# No names supplied? Import * from the lowest level module.

960

# (Ugh, why do I have to implement this part myself?)

961

if not names:

962

for submodule_name in module_name.split('.')[1:]:

963

module = getattr(module, submodule_name)

964

if hasattr(module, '__all__'):

965

names = getattr(module, '__all__')

else:

names = dir(module)

# Install each name into namespace, checking to make sure it

970

# doesn't override anything that already exists.

971

for name in names:

972

# Check for conflicts to help prevent future problems.

973

if name in namespace and protect:

974

if namespace[name] is not getattr(module, name):

975

raise error.AutoservError('importing name '

976

'%s from %s %r would override %r' %

977

(name, module_name, getattr(module, name),

978

namespace[name]))

979

else:

980

# Encourage cleanliness and the use of __all__ for a

981

# more concrete API with less surprises on '*' imports.

982

warnings.warn('%s (%r) being imported from %s for use '

983

'in server control files is not the '

984

'first occurrance of that import.' %

985

(name, namespace[name], module_name))

986

987

namespace[name] = getattr(module, name)

988

989

990

# This is the equivalent of prepending a bunch of import statements to

991

# the front of the control script.

mbligh

a2b07dd

2009-06-22 18:26:13 +0000

[diff] [blame]

992

namespace.update(os=os, sys=sys, logging=logging)

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

993

_import_names('autotest_lib.server',

994

('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',

995

'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))

996

_import_names('autotest_lib.server.subcommand',

997

('parallel', 'parallel_simple', 'subcommand'))

998

_import_names('autotest_lib.server.utils',

999

('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))

1000

_import_names('autotest_lib.client.common_lib.error')

1001

_import_names('autotest_lib.client.common_lib.barrier', ('barrier',))

1002

1003

# Inject ourself as the job object into other classes within the API.

1004

# (Yuck, this injection is a gross thing be part of a public API. -gps)

1005

#

1006

# XXX Base & SiteAutotest do not appear to use .job. Who does?

1007

namespace['autotest'].Autotest.job = self

1008

# server.hosts.base_classes.Host uses .job.

1009

namespace['hosts'].Host.job = self

1010

1011

1012

def _execute_code(self, code_file, namespace, protect=True):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1013

"""

1014

Execute code using a copy of namespace as a server control script.

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1015

1016

Unless protect_namespace is explicitly set to False, the dict will not

be modified.

Args:

code_file: The filename of the control file to execute.

1021

namespace: A dict containing names to make available during execution.

1022

protect: Boolean. If True (the default) a copy of the namespace dict

1023

is used during execution to prevent the code from modifying its

1024

contents outside of this function. If False the raw dict is

1025

passed in and modifications will be allowed.

1026

"""

1027

if protect:

1028

namespace = namespace.copy()

1029

self._fill_server_control_namespace(namespace, protect=protect)

1030

# TODO: Simplify and get rid of the special cases for only 1 machine.

showard

3e66e8c

2008-10-27 19:20:51 +0000

[diff] [blame]

1031

if len(self.machines) > 1:

mbligh

2008-10-18 14:02:45 +0000

[diff] [blame]

1032

machines_text = '\n'.join(self.machines) + '\n'

1033

# Only rewrite the file if it does not match our machine list.

1034

try:

1035

machines_f = open(MACHINES_FILENAME, 'r')

1036

existing_machines_text = machines_f.read()

1037

machines_f.close()

1038

except EnvironmentError:

1039

existing_machines_text = None

1040

if machines_text != existing_machines_text:

1041

utils.open_write_close(MACHINES_FILENAME, machines_text)

1042

execfile(code_file, namespace, namespace)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1043

1044

1045

def _record(self, status_code, subdir, operation, status='',

1046

epoch_time=None, optional_fields=None):

1047

"""

1048

Actual function for recording a single line into the status

1049

logs. Should never be called directly, only by job.record as

1050

this would bypass the console monitor logging.

1051

"""

1052

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1053

msg = self._render_record(status_code, subdir, operation, status,

1054

epoch_time, optional_fields=optional_fields)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1055

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

1056

status_file = self.get_status_log_path()

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1057

sys.stdout.write(msg)

mbligh

210bae6

2009-04-01 18:33:13 +0000

[diff] [blame]

1058

if status_file:

1059

open(status_file, "a").write(msg)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1060

if subdir:

jadmanski

2009-03-19 17:33:33 +0000

[diff] [blame]

1061

sub_status_file = self.get_status_log_path(subdir)

1062

open(sub_status_file, "a").write(msg)

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1063

self.__parse_status(msg.splitlines())

1064

1065

1066

def __parse_status(self, new_lines):

mbligh

2009-11-06 03:15:03 +0000

[diff] [blame]

1067

if not self._using_parser:

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1068

return

1069

new_tests = self.parser.process_lines(new_lines)

1070

for test in new_tests:

1071

self.__insert_test(test)

1072

1073

1074

def __insert_test(self, test):

mbligh

2008-11-22 13:25:32 +0000

[diff] [blame]

1075

"""

1076

An internal method to insert a new test result into the

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1077

database. This method will not raise an exception, even if an

1078

error occurs during the insert, to avoid failing a test

1079

simply because of unexpected database issues."""

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

1080

self.num_tests_run += 1

1081

if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):

1082

self.num_tests_failed += 1

jadmanski

2008-08-13 14:05:21 +0000

[diff] [blame]

1083

try:

1084

self.results_db.insert_test(self.job_model, test)

1085

except Exception:

1086

msg = ("WARNING: An unexpected error occured while "

1087

"inserting test results into the database. "

1088

"Ignoring error.\n" + traceback.format_exc())

1089

print >> sys.stderr, msg

1090

mbligh

caa62c2

2008-04-07 21:51:17 +0000

[diff] [blame]

1091

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1092

def preprocess_client_state(self):

1093

"""

1094

Produce a state file for initializing the state of a client job.

1095

1096

Creates a new client state file with all the current server state, as

1097

well as some pre-set client state.

1098

1099

@returns The path of the file the state was written into.

1100

"""

1101

# initialize the sysinfo state

1102

self._state.set('client', 'sysinfo', self.sysinfo.serialize())

1103

1104

# dump the state out to a tempfile

1105

fd, file_path = tempfile.mkstemp(dir=self.tmpdir)

1106

os.close(fd)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1107

1108

# write_to_file doesn't need locking, we exclusively own file_path

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1109

self._state.write_to_file(file_path)

return file_path

def postprocess_client_state(self, state_path):

1114

"""

1115

Update the state of this job with the state from a client job.

1116

1117

Updates the state of the server side of a job with the final state

1118

of a client job that was run. Updates the non-client-specific state,

1119

pulls in some specific bits from the client-specific state, and then

1120

discards the rest. Removes the state file afterwards

1121

1122

@param state_file A path to the state file from the client.

1123

"""

1124

# update the on-disk state

1125

self._state.read_from_file(state_path)

1126

try:

1127

os.remove(state_path)

mbligh

a2c9949

2010-01-27 22:59:50 +0000

[diff] [blame]

1128

except OSError, e:

mbligh

2010-01-06 18:37:22 +0000

[diff] [blame]

1129

# ignore file-not-found errors

1130

if e.errno != errno.ENOENT:

1131

raise

1132

1133

# update the sysinfo state

1134

if self._state.has('client', 'sysinfo'):

1135

self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))

1136

1137

# drop all the client-specific state

1138

self._state.discard_namespace('client')

1139

1140

mbligh

a700772

2009-01-13 00:37:11 +0000

[diff] [blame]

1141

site_server_job = utils.import_site_class(

1142

__file__, "autotest_lib.server.site_server_job", "site_server_job",

1143

base_server_job)

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1144

mbligh

0a8c332

2009-04-28 18:32:19 +0000

[diff] [blame]

1145

class server_job(site_server_job):

jadmanski

0afbb63

2008-06-06 21:10:57 +0000

[diff] [blame]

1146

pass

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1147

1148

1149

class warning_manager(object):

1150

"""Class for controlling warning logs. Manages the enabling and disabling

1151

of warnings."""

1152

def __init__(self):

1153

# a map of warning types to a list of disabled time intervals

1154

self.disabled_warnings = {}

1155

1156

1157

def is_valid(self, timestamp, warning_type):

1158

"""Indicates if a warning (based on the time it occured and its type)

1159

is a valid warning. A warning is considered "invalid" if this type of

1160

warning was marked as "disabled" at the time the warning occured."""

1161

disabled_intervals = self.disabled_warnings.get(warning_type, [])

1162

for start, end in disabled_intervals:

1163

if timestamp >= start and (end is None or timestamp < end):

return False

return True

def disable_warnings(self, warning_type, current_time_func=time.time):

1169

"""As of now, disables all further warnings of this type."""

1170

intervals = self.disabled_warnings.setdefault(warning_type, [])

1171

if not intervals or intervals[-1][1] is not None:

jadmanski

2009-04-01 18:19:53 +0000

[diff] [blame]

1172

intervals.append((int(current_time_func()), None))

jadmanski

2009-02-11 00:03:26 +0000

[diff] [blame]

1173

1174

1175

def enable_warnings(self, warning_type, current_time_func=time.time):

1176

"""As of now, enables all further warnings of this type."""

1177

intervals = self.disabled_warnings.get(warning_type, [])

1178

if intervals and intervals[-1][1] is None:

jadmanski