blob: d156afef8ed2f619e4c939943cc2254fd1efdddc [file] [log] [blame]
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -08001#!/usr/bin/python
2
Yunlian Jiang00cc30e2013-03-28 13:23:57 -07003# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -08006
Ahmad Sharif4467f002012-12-20 12:09:49 -08007"""The experiment setting module."""
8
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -08009import os
10import time
Ahmad Sharif4467f002012-12-20 12:09:49 -080011
cmticee5bc63b2015-05-27 16:59:37 -070012import afe_lock_machine
Han Shenba649282015-08-05 17:19:55 -070013from threading import Lock
cmticee5bc63b2015-05-27 16:59:37 -070014
Ahmad Sharif4467f002012-12-20 12:09:49 -080015from utils import logger
Yunlian Jiang00cc30e2013-03-28 13:23:57 -070016from utils import misc
Ahmad Sharif4467f002012-12-20 12:09:49 -080017
Han Shene0662972015-09-18 16:53:34 -070018import benchmark_run
Han Shen738e6de2015-12-07 13:22:25 -080019from machine_manager import BadChecksum
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080020from machine_manager import MachineManager
Ahmad Sharif4467f002012-12-20 12:09:49 -080021from machine_manager import MockMachineManager
Ahmad Sharif4467f002012-12-20 12:09:49 -080022import test_flag
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080023
24
25class Experiment(object):
26 """Class representing an Experiment to be run."""
27
Luis Lozanof81680c2013-03-15 14:44:13 -070028 def __init__(self, name, remote, working_directory,
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080029 chromeos_root, cache_conditions, labels, benchmarks,
Luis Lozanof81680c2013-03-15 14:44:13 -070030 experiment_file, email_to, acquire_timeout, log_dir,
cmtice5c09fc22015-04-22 09:25:53 -070031 log_level, share_cache, results_directory, locks_directory):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080032 self.name = name
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080033 self.working_directory = working_directory
34 self.remote = remote
35 self.chromeos_root = chromeos_root
36 self.cache_conditions = cache_conditions
37 self.experiment_file = experiment_file
Ahmad Shariff395c262012-10-09 17:48:09 -070038 self.email_to = email_to
Yunlian Jiang00cc30e2013-03-28 13:23:57 -070039 if not results_directory:
40 self.results_directory = os.path.join(self.working_directory,
41 self.name + "_results")
42 else:
43 self.results_directory = misc.CanonicalizePath(results_directory)
Luis Lozanof81680c2013-03-15 14:44:13 -070044 self.log_dir = log_dir
cmtice13909242014-03-11 13:38:07 -070045 self.log_level = log_level
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080046 self.labels = labels
47 self.benchmarks = benchmarks
48 self.num_complete = 0
Ahmad Sharif4467f002012-12-20 12:09:49 -080049 self.num_run_complete = 0
cmtice1a224362014-10-16 15:49:56 -070050 self.share_cache = share_cache
cmtice517dc982015-06-12 12:22:32 -070051 # If locks_directory (self.lock_dir) not blank, we will use the file
52 # locking mechanism; if it is blank then we will use the AFE server
53 # locking mechanism.
54 self.locks_dir = locks_directory
cmticef3eb8032015-07-27 13:55:52 -070055 self.locked_machines = []
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080056
57 # We need one chromeos_root to run the benchmarks in, but it doesn't
58 # matter where it is, unless the ABIs are different.
59 if not chromeos_root:
60 for label in self.labels:
61 if label.chromeos_root:
62 chromeos_root = label.chromeos_root
63 if not chromeos_root:
64 raise Exception("No chromeos_root given and could not determine one from "
65 "the image path.")
66
Ahmad Sharif4467f002012-12-20 12:09:49 -080067 if test_flag.GetTestMode():
cmtice13909242014-03-11 13:38:07 -070068 self.machine_manager = MockMachineManager(chromeos_root, acquire_timeout,
cmticed96e4572015-05-19 16:19:25 -070069 log_level, locks_directory)
Ahmad Sharif4467f002012-12-20 12:09:49 -080070 else:
cmtice13909242014-03-11 13:38:07 -070071 self.machine_manager = MachineManager(chromeos_root, acquire_timeout,
cmtice517dc982015-06-12 12:22:32 -070072 log_level, locks_directory)
Luis Lozanof81680c2013-03-15 14:44:13 -070073 self.l = logger.GetLogger(log_dir)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080074
Han Shenf9b50352015-09-17 11:26:22 -070075 for machine in self.remote:
76 # machine_manager.AddMachine only adds reachable machines.
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080077 self.machine_manager.AddMachine(machine)
Han Shenf9b50352015-09-17 11:26:22 -070078 # Now machine_manager._all_machines contains a list of reachable
79 # machines. This is a subset of self.remote. We make both lists the same.
80 self.remote = [m.name for m in self.machine_manager._all_machines]
81
Ahmad Sharif4467f002012-12-20 12:09:49 -080082 for label in labels:
Han Shenf9b50352015-09-17 11:26:22 -070083 # We filter out label remotes that are not reachable (not in
84 # self.remote). So each label.remote is a sublist of experiment.remote.
85 label.remote = filter(lambda x: x in self.remote, label.remote)
Han Shen738e6de2015-12-07 13:22:25 -080086 try:
87 self.machine_manager.ComputeCommonCheckSum(label)
88 except BadChecksum:
89 # Force same image on all machines, then we do checksum again. No
90 # bailout if checksums still do not match.
91 self.machine_manager.ForceSameImageToAllMachines(label)
92 self.machine_manager.ComputeCommonCheckSum(label)
93
Ahmad Sharif4467f002012-12-20 12:09:49 -080094 self.machine_manager.ComputeCommonCheckSumString(label)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080095
96 self.start_time = None
97 self.benchmark_runs = self._GenerateBenchmarkRuns()
98
Han Shenba649282015-08-05 17:19:55 -070099 self._schedv2 = None
100 self._internal_counter_lock = Lock()
101
102 def set_schedv2(self, schedv2):
Caroline Ticeddde5052015-09-23 09:43:35 -0700103 self._schedv2 = schedv2
Han Shenba649282015-08-05 17:19:55 -0700104
105 def schedv2(self):
Caroline Ticeddde5052015-09-23 09:43:35 -0700106 return self._schedv2
Han Shenba649282015-08-05 17:19:55 -0700107
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800108 def _GenerateBenchmarkRuns(self):
109 """Generate benchmark runs from labels and benchmark defintions."""
110 benchmark_runs = []
111 for label in self.labels:
112 for benchmark in self.benchmarks:
113 for iteration in range(1, benchmark.iterations + 1):
114
115 benchmark_run_name = "%s: %s (%s)" % (label.name, benchmark.name,
116 iteration)
117 full_name = "%s_%s_%s" % (label.name, benchmark.name, iteration)
Luis Lozanof81680c2013-03-15 14:44:13 -0700118 logger_to_use = logger.Logger(self.log_dir,
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800119 "run.%s" % (full_name),
cmtice77892942014-03-18 13:47:17 -0700120 True)
Han Shene0662972015-09-18 16:53:34 -0700121 benchmark_runs.append(benchmark_run.BenchmarkRun(
122 benchmark_run_name,
123 benchmark,
124 label,
125 iteration,
126 self.cache_conditions,
127 self.machine_manager,
128 logger_to_use,
129 self.log_level,
130 self.share_cache))
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800131
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800132 return benchmark_runs
133
134 def Build(self):
135 pass
136
137 def Terminate(self):
Han Shenba649282015-08-05 17:19:55 -0700138 if self._schedv2 is not None:
139 self._schedv2.terminate()
140 else:
141 for t in self.benchmark_runs:
142 if t.isAlive():
143 self.l.LogError("Terminating run: '%s'." % t.name)
144 t.Terminate()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800145
146 def IsComplete(self):
Han Shenba649282015-08-05 17:19:55 -0700147 if self._schedv2:
148 return self._schedv2.is_complete()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800149 if self.active_threads:
150 for t in self.active_threads:
151 if t.isAlive():
152 t.join(0)
153 if not t.isAlive():
154 self.num_complete += 1
Ahmad Sharif4467f002012-12-20 12:09:49 -0800155 if not t.cache_hit:
156 self.num_run_complete += 1
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800157 self.active_threads.remove(t)
158 return False
159 return True
160
Han Shenba649282015-08-05 17:19:55 -0700161 def BenchmarkRunFinished(self, br):
162 """Update internal counters after br finishes.
163
164 Note this is only used by schedv2 and is called by multiple threads.
165 Never throw any exception here.
166 """
167
168 assert self._schedv2 is not None
169 with self._internal_counter_lock:
170 self.num_complete += 1
171 if not br.cache_hit:
172 self.num_run_complete += 1
173
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800174 def Run(self):
175 self.start_time = time.time()
Han Shenba649282015-08-05 17:19:55 -0700176 if self._schedv2 is not None:
177 self._schedv2.run_sched()
178 else:
179 self.active_threads = []
180 for benchmark_run in self.benchmark_runs:
181 # Set threads to daemon so program exits when ctrl-c is pressed.
182 benchmark_run.daemon = True
183 benchmark_run.start()
184 self.active_threads.append(benchmark_run)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800185
186 def SetCacheConditions(self, cache_conditions):
187 for benchmark_run in self.benchmark_runs:
188 benchmark_run.SetCacheConditions(cache_conditions)
189
190 def Cleanup(self):
cmticee5bc63b2015-05-27 16:59:37 -0700191 """Make sure all machines are unlocked."""
cmtice517dc982015-06-12 12:22:32 -0700192 if self.locks_dir:
193 # We are using the file locks mechanism, so call machine_manager.Cleanup
194 # to unlock everything.
195 self.machine_manager.Cleanup()
196 else:
cmticef3eb8032015-07-27 13:55:52 -0700197 all_machines = self.locked_machines
198 if not all_machines:
199 return
200
201 # If we locked any machines earlier, make sure we unlock them now.
cmtice517dc982015-06-12 12:22:32 -0700202 lock_mgr = afe_lock_machine.AFELockManager(all_machines, "",
203 self.labels[0].chromeos_root,
204 None)
205 machine_states = lock_mgr.GetMachineStates("unlock")
206 for k, state in machine_states.iteritems():
207 if state["locked"]:
208 lock_mgr.UpdateLockInAFE(False, k)