blob: b0243e3b278c2efed2376550c147a75e82dd9139 [file] [log] [blame]
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -08001#!/usr/bin/python
2
Yunlian Jiang00cc30e2013-03-28 13:23:57 -07003# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -08006
Ahmad Sharif4467f002012-12-20 12:09:49 -08007"""The experiment setting module."""
8
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -08009import os
10import time
Ahmad Sharif4467f002012-12-20 12:09:49 -080011
cmticee5bc63b2015-05-27 16:59:37 -070012import afe_lock_machine
Han Shenba649282015-08-05 17:19:55 -070013from threading import Lock
cmticee5bc63b2015-05-27 16:59:37 -070014
Ahmad Sharif4467f002012-12-20 12:09:49 -080015from utils import logger
Yunlian Jiang00cc30e2013-03-28 13:23:57 -070016from utils import misc
Ahmad Sharif4467f002012-12-20 12:09:49 -080017
Han Shene0662972015-09-18 16:53:34 -070018import benchmark_run
Han Shen738e6de2015-12-07 13:22:25 -080019from machine_manager import BadChecksum
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080020from machine_manager import MachineManager
Ahmad Sharif4467f002012-12-20 12:09:49 -080021from machine_manager import MockMachineManager
Ahmad Sharif4467f002012-12-20 12:09:49 -080022import test_flag
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080023
24
25class Experiment(object):
26 """Class representing an Experiment to be run."""
27
Luis Lozanof81680c2013-03-15 14:44:13 -070028 def __init__(self, name, remote, working_directory,
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080029 chromeos_root, cache_conditions, labels, benchmarks,
Luis Lozanof81680c2013-03-15 14:44:13 -070030 experiment_file, email_to, acquire_timeout, log_dir,
cmtice5c09fc22015-04-22 09:25:53 -070031 log_level, share_cache, results_directory, locks_directory):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080032 self.name = name
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080033 self.working_directory = working_directory
34 self.remote = remote
35 self.chromeos_root = chromeos_root
36 self.cache_conditions = cache_conditions
37 self.experiment_file = experiment_file
Ahmad Shariff395c262012-10-09 17:48:09 -070038 self.email_to = email_to
Yunlian Jiang00cc30e2013-03-28 13:23:57 -070039 if not results_directory:
40 self.results_directory = os.path.join(self.working_directory,
41 self.name + "_results")
42 else:
43 self.results_directory = misc.CanonicalizePath(results_directory)
Luis Lozanof81680c2013-03-15 14:44:13 -070044 self.log_dir = log_dir
cmtice13909242014-03-11 13:38:07 -070045 self.log_level = log_level
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080046 self.labels = labels
47 self.benchmarks = benchmarks
48 self.num_complete = 0
Ahmad Sharif4467f002012-12-20 12:09:49 -080049 self.num_run_complete = 0
cmtice1a224362014-10-16 15:49:56 -070050 self.share_cache = share_cache
cmtice517dc982015-06-12 12:22:32 -070051 # If locks_directory (self.lock_dir) not blank, we will use the file
52 # locking mechanism; if it is blank then we will use the AFE server
53 # locking mechanism.
54 self.locks_dir = locks_directory
cmticef3eb8032015-07-27 13:55:52 -070055 self.locked_machines = []
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080056
Luis Lozanodd417612015-12-08 12:08:44 -080057 if not remote:
58 raise RuntimeError("No remote hosts specified")
59 if not self.benchmarks:
60 raise RuntimeError("No benchmarks specified")
61 if not self.labels:
62 raise RuntimeError("No labels specified")
63
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080064 # We need one chromeos_root to run the benchmarks in, but it doesn't
65 # matter where it is, unless the ABIs are different.
66 if not chromeos_root:
67 for label in self.labels:
68 if label.chromeos_root:
69 chromeos_root = label.chromeos_root
Luis Lozanodd417612015-12-08 12:08:44 -080070 break
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080071 if not chromeos_root:
Luis Lozanodd417612015-12-08 12:08:44 -080072 raise RuntimeError("No chromeos_root given and could not determine "
73 "one from the image path.")
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080074
Ahmad Sharif4467f002012-12-20 12:09:49 -080075 if test_flag.GetTestMode():
cmtice13909242014-03-11 13:38:07 -070076 self.machine_manager = MockMachineManager(chromeos_root, acquire_timeout,
cmticed96e4572015-05-19 16:19:25 -070077 log_level, locks_directory)
Ahmad Sharif4467f002012-12-20 12:09:49 -080078 else:
cmtice13909242014-03-11 13:38:07 -070079 self.machine_manager = MachineManager(chromeos_root, acquire_timeout,
cmtice517dc982015-06-12 12:22:32 -070080 log_level, locks_directory)
Luis Lozanof81680c2013-03-15 14:44:13 -070081 self.l = logger.GetLogger(log_dir)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080082
Han Shenf9b50352015-09-17 11:26:22 -070083 for machine in self.remote:
84 # machine_manager.AddMachine only adds reachable machines.
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080085 self.machine_manager.AddMachine(machine)
Han Shenf9b50352015-09-17 11:26:22 -070086 # Now machine_manager._all_machines contains a list of reachable
87 # machines. This is a subset of self.remote. We make both lists the same.
88 self.remote = [m.name for m in self.machine_manager._all_machines]
89
Ahmad Sharif4467f002012-12-20 12:09:49 -080090 for label in labels:
Han Shenf9b50352015-09-17 11:26:22 -070091 # We filter out label remotes that are not reachable (not in
92 # self.remote). So each label.remote is a sublist of experiment.remote.
93 label.remote = filter(lambda x: x in self.remote, label.remote)
Han Shen738e6de2015-12-07 13:22:25 -080094 try:
95 self.machine_manager.ComputeCommonCheckSum(label)
96 except BadChecksum:
97 # Force same image on all machines, then we do checksum again. No
98 # bailout if checksums still do not match.
99 self.machine_manager.ForceSameImageToAllMachines(label)
100 self.machine_manager.ComputeCommonCheckSum(label)
101
Ahmad Sharif4467f002012-12-20 12:09:49 -0800102 self.machine_manager.ComputeCommonCheckSumString(label)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800103
104 self.start_time = None
105 self.benchmark_runs = self._GenerateBenchmarkRuns()
106
Han Shenba649282015-08-05 17:19:55 -0700107 self._schedv2 = None
108 self._internal_counter_lock = Lock()
109
110 def set_schedv2(self, schedv2):
Caroline Ticeddde5052015-09-23 09:43:35 -0700111 self._schedv2 = schedv2
Han Shenba649282015-08-05 17:19:55 -0700112
113 def schedv2(self):
Caroline Ticeddde5052015-09-23 09:43:35 -0700114 return self._schedv2
Han Shenba649282015-08-05 17:19:55 -0700115
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800116 def _GenerateBenchmarkRuns(self):
117 """Generate benchmark runs from labels and benchmark defintions."""
118 benchmark_runs = []
119 for label in self.labels:
120 for benchmark in self.benchmarks:
121 for iteration in range(1, benchmark.iterations + 1):
122
123 benchmark_run_name = "%s: %s (%s)" % (label.name, benchmark.name,
124 iteration)
125 full_name = "%s_%s_%s" % (label.name, benchmark.name, iteration)
Luis Lozanof81680c2013-03-15 14:44:13 -0700126 logger_to_use = logger.Logger(self.log_dir,
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800127 "run.%s" % (full_name),
cmtice77892942014-03-18 13:47:17 -0700128 True)
Han Shene0662972015-09-18 16:53:34 -0700129 benchmark_runs.append(benchmark_run.BenchmarkRun(
130 benchmark_run_name,
131 benchmark,
132 label,
133 iteration,
134 self.cache_conditions,
135 self.machine_manager,
136 logger_to_use,
137 self.log_level,
138 self.share_cache))
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800139
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800140 return benchmark_runs
141
142 def Build(self):
143 pass
144
145 def Terminate(self):
Han Shenba649282015-08-05 17:19:55 -0700146 if self._schedv2 is not None:
147 self._schedv2.terminate()
148 else:
149 for t in self.benchmark_runs:
150 if t.isAlive():
151 self.l.LogError("Terminating run: '%s'." % t.name)
152 t.Terminate()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800153
154 def IsComplete(self):
Han Shenba649282015-08-05 17:19:55 -0700155 if self._schedv2:
156 return self._schedv2.is_complete()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800157 if self.active_threads:
158 for t in self.active_threads:
159 if t.isAlive():
160 t.join(0)
161 if not t.isAlive():
162 self.num_complete += 1
Ahmad Sharif4467f002012-12-20 12:09:49 -0800163 if not t.cache_hit:
164 self.num_run_complete += 1
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800165 self.active_threads.remove(t)
166 return False
167 return True
168
Han Shenba649282015-08-05 17:19:55 -0700169 def BenchmarkRunFinished(self, br):
170 """Update internal counters after br finishes.
171
172 Note this is only used by schedv2 and is called by multiple threads.
173 Never throw any exception here.
174 """
175
176 assert self._schedv2 is not None
177 with self._internal_counter_lock:
178 self.num_complete += 1
179 if not br.cache_hit:
180 self.num_run_complete += 1
181
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800182 def Run(self):
183 self.start_time = time.time()
Han Shenba649282015-08-05 17:19:55 -0700184 if self._schedv2 is not None:
185 self._schedv2.run_sched()
186 else:
187 self.active_threads = []
188 for benchmark_run in self.benchmark_runs:
189 # Set threads to daemon so program exits when ctrl-c is pressed.
190 benchmark_run.daemon = True
191 benchmark_run.start()
192 self.active_threads.append(benchmark_run)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800193
194 def SetCacheConditions(self, cache_conditions):
195 for benchmark_run in self.benchmark_runs:
196 benchmark_run.SetCacheConditions(cache_conditions)
197
198 def Cleanup(self):
cmticee5bc63b2015-05-27 16:59:37 -0700199 """Make sure all machines are unlocked."""
cmtice517dc982015-06-12 12:22:32 -0700200 if self.locks_dir:
201 # We are using the file locks mechanism, so call machine_manager.Cleanup
202 # to unlock everything.
203 self.machine_manager.Cleanup()
204 else:
cmticef3eb8032015-07-27 13:55:52 -0700205 all_machines = self.locked_machines
206 if not all_machines:
207 return
208
209 # If we locked any machines earlier, make sure we unlock them now.
cmtice517dc982015-06-12 12:22:32 -0700210 lock_mgr = afe_lock_machine.AFELockManager(all_machines, "",
211 self.labels[0].chromeos_root,
212 None)
213 machine_states = lock_mgr.GetMachineStates("unlock")
214 for k, state in machine_states.iteritems():
215 if state["locked"]:
216 lock_mgr.UpdateLockInAFE(False, k)