blob: a4da92272fedfe5eef2005a236fe9b08e2da6b34 [file] [log] [blame]
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -08001#!/usr/bin/python
2
Yunlian Jiang00cc30e2013-03-28 13:23:57 -07003# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -08006
Ahmad Sharif4467f002012-12-20 12:09:49 -08007"""The experiment setting module."""
8
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -08009import os
10import time
Ahmad Sharif4467f002012-12-20 12:09:49 -080011
cmticee5bc63b2015-05-27 16:59:37 -070012import afe_lock_machine
Han Shenba649282015-08-05 17:19:55 -070013from threading import Lock
cmticee5bc63b2015-05-27 16:59:37 -070014
Yunlian Jiang0d1a9f32015-12-09 10:47:11 -080015from cros_utils import logger
16from cros_utils import misc
Ahmad Sharif4467f002012-12-20 12:09:49 -080017
Han Shene0662972015-09-18 16:53:34 -070018import benchmark_run
Han Shen738e6de2015-12-07 13:22:25 -080019from machine_manager import BadChecksum
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080020from machine_manager import MachineManager
Ahmad Sharif4467f002012-12-20 12:09:49 -080021from machine_manager import MockMachineManager
Ahmad Sharif4467f002012-12-20 12:09:49 -080022import test_flag
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080023
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080024class Experiment(object):
25 """Class representing an Experiment to be run."""
26
Luis Lozanof81680c2013-03-15 14:44:13 -070027 def __init__(self, name, remote, working_directory,
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080028 chromeos_root, cache_conditions, labels, benchmarks,
Luis Lozanof81680c2013-03-15 14:44:13 -070029 experiment_file, email_to, acquire_timeout, log_dir,
cmtice5c09fc22015-04-22 09:25:53 -070030 log_level, share_cache, results_directory, locks_directory):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080031 self.name = name
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080032 self.working_directory = working_directory
33 self.remote = remote
34 self.chromeos_root = chromeos_root
35 self.cache_conditions = cache_conditions
36 self.experiment_file = experiment_file
Ahmad Shariff395c262012-10-09 17:48:09 -070037 self.email_to = email_to
Yunlian Jiang00cc30e2013-03-28 13:23:57 -070038 if not results_directory:
39 self.results_directory = os.path.join(self.working_directory,
40 self.name + "_results")
41 else:
42 self.results_directory = misc.CanonicalizePath(results_directory)
Luis Lozanof81680c2013-03-15 14:44:13 -070043 self.log_dir = log_dir
cmtice13909242014-03-11 13:38:07 -070044 self.log_level = log_level
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080045 self.labels = labels
46 self.benchmarks = benchmarks
47 self.num_complete = 0
Ahmad Sharif4467f002012-12-20 12:09:49 -080048 self.num_run_complete = 0
cmtice1a224362014-10-16 15:49:56 -070049 self.share_cache = share_cache
cmtice517dc982015-06-12 12:22:32 -070050 # If locks_directory (self.lock_dir) not blank, we will use the file
51 # locking mechanism; if it is blank then we will use the AFE server
52 # locking mechanism.
53 self.locks_dir = locks_directory
cmticef3eb8032015-07-27 13:55:52 -070054 self.locked_machines = []
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080055
Luis Lozanodd417612015-12-08 12:08:44 -080056 if not remote:
57 raise RuntimeError("No remote hosts specified")
58 if not self.benchmarks:
59 raise RuntimeError("No benchmarks specified")
60 if not self.labels:
61 raise RuntimeError("No labels specified")
62
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080063 # We need one chromeos_root to run the benchmarks in, but it doesn't
64 # matter where it is, unless the ABIs are different.
65 if not chromeos_root:
66 for label in self.labels:
67 if label.chromeos_root:
68 chromeos_root = label.chromeos_root
Luis Lozanodd417612015-12-08 12:08:44 -080069 break
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080070 if not chromeos_root:
Luis Lozanodd417612015-12-08 12:08:44 -080071 raise RuntimeError("No chromeos_root given and could not determine "
72 "one from the image path.")
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080073
Ahmad Sharif4467f002012-12-20 12:09:49 -080074 if test_flag.GetTestMode():
cmtice13909242014-03-11 13:38:07 -070075 self.machine_manager = MockMachineManager(chromeos_root, acquire_timeout,
cmticed96e4572015-05-19 16:19:25 -070076 log_level, locks_directory)
Ahmad Sharif4467f002012-12-20 12:09:49 -080077 else:
cmtice13909242014-03-11 13:38:07 -070078 self.machine_manager = MachineManager(chromeos_root, acquire_timeout,
cmtice517dc982015-06-12 12:22:32 -070079 log_level, locks_directory)
Luis Lozanof81680c2013-03-15 14:44:13 -070080 self.l = logger.GetLogger(log_dir)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080081
Han Shenf9b50352015-09-17 11:26:22 -070082 for machine in self.remote:
83 # machine_manager.AddMachine only adds reachable machines.
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080084 self.machine_manager.AddMachine(machine)
Han Shenf9b50352015-09-17 11:26:22 -070085 # Now machine_manager._all_machines contains a list of reachable
86 # machines. This is a subset of self.remote. We make both lists the same.
87 self.remote = [m.name for m in self.machine_manager._all_machines]
Caroline Tice51d7a9b2015-12-09 08:01:54 -080088 if not self.remote:
89 raise RuntimeError("No machine available for running experiment.")
Han Shenf9b50352015-09-17 11:26:22 -070090
Ahmad Sharif4467f002012-12-20 12:09:49 -080091 for label in labels:
Han Shenf9b50352015-09-17 11:26:22 -070092 # We filter out label remotes that are not reachable (not in
93 # self.remote). So each label.remote is a sublist of experiment.remote.
94 label.remote = filter(lambda x: x in self.remote, label.remote)
Han Shen738e6de2015-12-07 13:22:25 -080095 try:
96 self.machine_manager.ComputeCommonCheckSum(label)
97 except BadChecksum:
98 # Force same image on all machines, then we do checksum again. No
99 # bailout if checksums still do not match.
100 self.machine_manager.ForceSameImageToAllMachines(label)
101 self.machine_manager.ComputeCommonCheckSum(label)
102
Ahmad Sharif4467f002012-12-20 12:09:49 -0800103 self.machine_manager.ComputeCommonCheckSumString(label)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800104
105 self.start_time = None
106 self.benchmark_runs = self._GenerateBenchmarkRuns()
107
Han Shenba649282015-08-05 17:19:55 -0700108 self._schedv2 = None
109 self._internal_counter_lock = Lock()
110
111 def set_schedv2(self, schedv2):
Caroline Ticeddde5052015-09-23 09:43:35 -0700112 self._schedv2 = schedv2
Han Shenba649282015-08-05 17:19:55 -0700113
114 def schedv2(self):
Caroline Ticeddde5052015-09-23 09:43:35 -0700115 return self._schedv2
Han Shenba649282015-08-05 17:19:55 -0700116
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800117 def _GenerateBenchmarkRuns(self):
118 """Generate benchmark runs from labels and benchmark defintions."""
119 benchmark_runs = []
120 for label in self.labels:
121 for benchmark in self.benchmarks:
122 for iteration in range(1, benchmark.iterations + 1):
123
124 benchmark_run_name = "%s: %s (%s)" % (label.name, benchmark.name,
125 iteration)
126 full_name = "%s_%s_%s" % (label.name, benchmark.name, iteration)
Luis Lozanof81680c2013-03-15 14:44:13 -0700127 logger_to_use = logger.Logger(self.log_dir,
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800128 "run.%s" % (full_name),
cmtice77892942014-03-18 13:47:17 -0700129 True)
Han Shene0662972015-09-18 16:53:34 -0700130 benchmark_runs.append(benchmark_run.BenchmarkRun(
131 benchmark_run_name,
132 benchmark,
133 label,
134 iteration,
135 self.cache_conditions,
136 self.machine_manager,
137 logger_to_use,
138 self.log_level,
139 self.share_cache))
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800140
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800141 return benchmark_runs
142
143 def Build(self):
144 pass
145
146 def Terminate(self):
Han Shenba649282015-08-05 17:19:55 -0700147 if self._schedv2 is not None:
148 self._schedv2.terminate()
149 else:
150 for t in self.benchmark_runs:
151 if t.isAlive():
152 self.l.LogError("Terminating run: '%s'." % t.name)
153 t.Terminate()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800154
155 def IsComplete(self):
Han Shenba649282015-08-05 17:19:55 -0700156 if self._schedv2:
157 return self._schedv2.is_complete()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800158 if self.active_threads:
159 for t in self.active_threads:
160 if t.isAlive():
161 t.join(0)
162 if not t.isAlive():
163 self.num_complete += 1
Ahmad Sharif4467f002012-12-20 12:09:49 -0800164 if not t.cache_hit:
165 self.num_run_complete += 1
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800166 self.active_threads.remove(t)
167 return False
168 return True
169
Han Shenba649282015-08-05 17:19:55 -0700170 def BenchmarkRunFinished(self, br):
171 """Update internal counters after br finishes.
172
173 Note this is only used by schedv2 and is called by multiple threads.
174 Never throw any exception here.
175 """
176
177 assert self._schedv2 is not None
178 with self._internal_counter_lock:
179 self.num_complete += 1
180 if not br.cache_hit:
181 self.num_run_complete += 1
182
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800183 def Run(self):
184 self.start_time = time.time()
Han Shenba649282015-08-05 17:19:55 -0700185 if self._schedv2 is not None:
186 self._schedv2.run_sched()
187 else:
188 self.active_threads = []
189 for benchmark_run in self.benchmark_runs:
190 # Set threads to daemon so program exits when ctrl-c is pressed.
191 benchmark_run.daemon = True
192 benchmark_run.start()
193 self.active_threads.append(benchmark_run)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800194
195 def SetCacheConditions(self, cache_conditions):
196 for benchmark_run in self.benchmark_runs:
197 benchmark_run.SetCacheConditions(cache_conditions)
198
199 def Cleanup(self):
cmticee5bc63b2015-05-27 16:59:37 -0700200 """Make sure all machines are unlocked."""
cmtice517dc982015-06-12 12:22:32 -0700201 if self.locks_dir:
202 # We are using the file locks mechanism, so call machine_manager.Cleanup
203 # to unlock everything.
204 self.machine_manager.Cleanup()
205 else:
cmticef3eb8032015-07-27 13:55:52 -0700206 all_machines = self.locked_machines
207 if not all_machines:
208 return
209
210 # If we locked any machines earlier, make sure we unlock them now.
cmtice517dc982015-06-12 12:22:32 -0700211 lock_mgr = afe_lock_machine.AFELockManager(all_machines, "",
212 self.labels[0].chromeos_root,
213 None)
214 machine_states = lock_mgr.GetMachineStates("unlock")
215 for k, state in machine_states.iteritems():
216 if state["locked"]:
217 lock_mgr.UpdateLockInAFE(False, k)