blob: 854d7f77b4a6589587db6fdc9dc47bfe5120a67a [file] [log] [blame]
Tiancong Wang3edb0f52019-01-07 11:43:07 -08001# -*- coding: utf-8 -*-
Yunlian Jiang00cc30e2013-03-28 13:23:57 -07002# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
Zhizhou Yang7aa250e2019-07-16 16:23:02 -07005
Ahmad Sharif4467f002012-12-20 12:09:49 -08006"""The experiment setting module."""
7
Yunlian Jiang742ed2c2015-12-10 10:05:59 -08008from __future__ import print_function
9
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080010import os
11import time
Ahmad Sharif4467f002012-12-20 12:09:49 -080012
Han Shenba649282015-08-05 17:19:55 -070013from threading import Lock
cmticee5bc63b2015-05-27 16:59:37 -070014
Yunlian Jiang0d1a9f32015-12-09 10:47:11 -080015from cros_utils import logger
16from cros_utils import misc
Ahmad Sharif4467f002012-12-20 12:09:49 -080017
Han Shene0662972015-09-18 16:53:34 -070018import benchmark_run
Han Shen738e6de2015-12-07 13:22:25 -080019from machine_manager import BadChecksum
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080020from machine_manager import MachineManager
Ahmad Sharif4467f002012-12-20 12:09:49 -080021from machine_manager import MockMachineManager
Ahmad Sharif4467f002012-12-20 12:09:49 -080022import test_flag
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080023
Luis Lozanof2a3ef42015-12-15 13:49:30 -080024
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080025class Experiment(object):
26 """Class representing an Experiment to be run."""
27
Luis Lozanof2a3ef42015-12-15 13:49:30 -080028 def __init__(self, name, remote, working_directory, chromeos_root,
29 cache_conditions, labels, benchmarks, experiment_file, email_to,
30 acquire_timeout, log_dir, log_level, share_cache,
zhizhouy52f8ed32020-05-21 13:52:48 -070031 results_directory, compress_results, locks_directory, cwp_dso,
Christopher Di Bella53e9fbe2021-03-18 20:31:06 +000032 ignore_min_max, crosfleet, dut_config):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080033 self.name = name
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080034 self.working_directory = working_directory
35 self.remote = remote
36 self.chromeos_root = chromeos_root
37 self.cache_conditions = cache_conditions
38 self.experiment_file = experiment_file
Ahmad Shariff395c262012-10-09 17:48:09 -070039 self.email_to = email_to
Yunlian Jiang00cc30e2013-03-28 13:23:57 -070040 if not results_directory:
41 self.results_directory = os.path.join(self.working_directory,
Luis Lozanof2a3ef42015-12-15 13:49:30 -080042 self.name + '_results')
Yunlian Jiang00cc30e2013-03-28 13:23:57 -070043 else:
44 self.results_directory = misc.CanonicalizePath(results_directory)
zhizhouy52f8ed32020-05-21 13:52:48 -070045 self.compress_results = compress_results
Luis Lozanof81680c2013-03-15 14:44:13 -070046 self.log_dir = log_dir
cmtice13909242014-03-11 13:38:07 -070047 self.log_level = log_level
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080048 self.labels = labels
49 self.benchmarks = benchmarks
50 self.num_complete = 0
Ahmad Sharif4467f002012-12-20 12:09:49 -080051 self.num_run_complete = 0
cmtice1a224362014-10-16 15:49:56 -070052 self.share_cache = share_cache
Caroline Ticee1a28bd2016-08-02 16:49:57 -070053 self.active_threads = []
cmtice517dc982015-06-12 12:22:32 -070054 self.locks_dir = locks_directory
cmticef3eb8032015-07-27 13:55:52 -070055 self.locked_machines = []
Zhizhou Yangcdd9e342019-09-19 20:56:32 -070056 self.lock_mgr = None
Zhizhou Yang1a199b12018-11-09 11:44:10 -080057 self.cwp_dso = cwp_dso
Zhizhou Yang1a5a3162019-03-14 13:25:06 -070058 self.ignore_min_max = ignore_min_max
Christopher Di Bella53e9fbe2021-03-18 20:31:06 +000059 self.crosfleet = crosfleet
Zhizhou Yang7aa250e2019-07-16 16:23:02 -070060 self.l = logger.GetLogger(log_dir)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080061
Luis Lozanodd417612015-12-08 12:08:44 -080062 if not self.benchmarks:
Luis Lozanof2a3ef42015-12-15 13:49:30 -080063 raise RuntimeError('No benchmarks specified')
Luis Lozanodd417612015-12-08 12:08:44 -080064 if not self.labels:
Luis Lozanof2a3ef42015-12-15 13:49:30 -080065 raise RuntimeError('No labels specified')
Christopher Di Bella53e9fbe2021-03-18 20:31:06 +000066 if not remote and not self.crosfleet:
Zhizhou Yang7aa250e2019-07-16 16:23:02 -070067 raise RuntimeError('No remote hosts specified')
Luis Lozanodd417612015-12-08 12:08:44 -080068
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080069 # We need one chromeos_root to run the benchmarks in, but it doesn't
70 # matter where it is, unless the ABIs are different.
71 if not chromeos_root:
72 for label in self.labels:
73 if label.chromeos_root:
74 chromeos_root = label.chromeos_root
Luis Lozanodd417612015-12-08 12:08:44 -080075 break
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080076 if not chromeos_root:
Luis Lozanof2a3ef42015-12-15 13:49:30 -080077 raise RuntimeError('No chromeos_root given and could not determine '
78 'one from the image path.')
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080079
David Sharpa9368342016-01-22 17:52:01 -080080 machine_manager_fn = MachineManager
Ahmad Sharif4467f002012-12-20 12:09:49 -080081 if test_flag.GetTestMode():
David Sharpa9368342016-01-22 17:52:01 -080082 machine_manager_fn = MockMachineManager
83 self.machine_manager = machine_manager_fn(chromeos_root, acquire_timeout,
84 log_level, locks_directory)
Luis Lozanof81680c2013-03-15 14:44:13 -070085 self.l = logger.GetLogger(log_dir)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080086
Han Shenf9b50352015-09-17 11:26:22 -070087 for machine in self.remote:
88 # machine_manager.AddMachine only adds reachable machines.
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080089 self.machine_manager.AddMachine(machine)
Han Shenf9b50352015-09-17 11:26:22 -070090 # Now machine_manager._all_machines contains a list of reachable
91 # machines. This is a subset of self.remote. We make both lists the same.
Caroline Ticee1a28bd2016-08-02 16:49:57 -070092 self.remote = [m.name for m in self.machine_manager.GetAllMachines()]
Caroline Tice51d7a9b2015-12-09 08:01:54 -080093 if not self.remote:
Luis Lozanof2a3ef42015-12-15 13:49:30 -080094 raise RuntimeError('No machine available for running experiment.')
Han Shenf9b50352015-09-17 11:26:22 -070095
Zhizhou Yangcdd9e342019-09-19 20:56:32 -070096 # Initialize checksums for all machines, ignore errors at this time.
97 # The checksum will be double checked, and image will be flashed after
98 # duts are locked/leased.
99 self.SetCheckSums()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800100
101 self.start_time = None
Denis Nikitin9d114042019-08-30 09:10:39 -0700102 self.benchmark_runs = self._GenerateBenchmarkRuns(dut_config)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800103
Han Shenba649282015-08-05 17:19:55 -0700104 self._schedv2 = None
105 self._internal_counter_lock = Lock()
106
107 def set_schedv2(self, schedv2):
Caroline Ticeddde5052015-09-23 09:43:35 -0700108 self._schedv2 = schedv2
Han Shenba649282015-08-05 17:19:55 -0700109
110 def schedv2(self):
Caroline Ticeddde5052015-09-23 09:43:35 -0700111 return self._schedv2
Han Shenba649282015-08-05 17:19:55 -0700112
Denis Nikitin9d114042019-08-30 09:10:39 -0700113 def _GenerateBenchmarkRuns(self, dut_config):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800114 """Generate benchmark runs from labels and benchmark defintions."""
115 benchmark_runs = []
116 for label in self.labels:
117 for benchmark in self.benchmarks:
Zhizhou Yang7aa250e2019-07-16 16:23:02 -0700118 for iteration in range(1, benchmark.iterations + 1):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800119
Luis Lozanof2a3ef42015-12-15 13:49:30 -0800120 benchmark_run_name = '%s: %s (%s)' % (label.name, benchmark.name,
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800121 iteration)
Luis Lozanof2a3ef42015-12-15 13:49:30 -0800122 full_name = '%s_%s_%s' % (label.name, benchmark.name, iteration)
123 logger_to_use = logger.Logger(self.log_dir, 'run.%s' % (full_name),
cmtice77892942014-03-18 13:47:17 -0700124 True)
Caroline Ticef6ef4392017-04-06 17:16:05 -0700125 benchmark_runs.append(
Christopher Di Bella53e9fbe2021-03-18 20:31:06 +0000126 benchmark_run.BenchmarkRun(benchmark_run_name, benchmark, label,
127 iteration, self.cache_conditions,
128 self.machine_manager, logger_to_use,
129 self.log_level, self.share_cache,
130 dut_config))
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800131
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800132 return benchmark_runs
133
Zhizhou Yangcdd9e342019-09-19 20:56:32 -0700134 def SetCheckSums(self, forceSameImage=False):
135 for label in self.labels:
136 # We filter out label remotes that are not reachable (not in
137 # self.remote). So each label.remote is a sublist of experiment.remote.
138 label.remote = [r for r in label.remote if r in self.remote]
139 try:
140 self.machine_manager.ComputeCommonCheckSum(label)
141 except BadChecksum:
142 # Force same image on all machines, then we do checksum again. No
143 # bailout if checksums still do not match.
144 # TODO (zhizhouy): Need to figure out how flashing image will influence
145 # the new checksum.
146 if forceSameImage:
147 self.machine_manager.ForceSameImageToAllMachines(label)
148 self.machine_manager.ComputeCommonCheckSum(label)
149
150 self.machine_manager.ComputeCommonCheckSumString(label)
151
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800152 def Build(self):
153 pass
154
155 def Terminate(self):
Han Shenba649282015-08-05 17:19:55 -0700156 if self._schedv2 is not None:
157 self._schedv2.terminate()
158 else:
159 for t in self.benchmark_runs:
160 if t.isAlive():
161 self.l.LogError("Terminating run: '%s'." % t.name)
162 t.Terminate()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800163
164 def IsComplete(self):
Han Shenba649282015-08-05 17:19:55 -0700165 if self._schedv2:
166 return self._schedv2.is_complete()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800167 if self.active_threads:
168 for t in self.active_threads:
169 if t.isAlive():
170 t.join(0)
171 if not t.isAlive():
172 self.num_complete += 1
Ahmad Sharif4467f002012-12-20 12:09:49 -0800173 if not t.cache_hit:
174 self.num_run_complete += 1
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800175 self.active_threads.remove(t)
176 return False
177 return True
178
Han Shenba649282015-08-05 17:19:55 -0700179 def BenchmarkRunFinished(self, br):
Yunlian Jiang742ed2c2015-12-10 10:05:59 -0800180 """Update internal counters after br finishes.
Han Shenba649282015-08-05 17:19:55 -0700181
Yunlian Jiang742ed2c2015-12-10 10:05:59 -0800182 Note this is only used by schedv2 and is called by multiple threads.
183 Never throw any exception here.
184 """
Han Shenba649282015-08-05 17:19:55 -0700185
Yunlian Jiang742ed2c2015-12-10 10:05:59 -0800186 assert self._schedv2 is not None
187 with self._internal_counter_lock:
188 self.num_complete += 1
189 if not br.cache_hit:
190 self.num_run_complete += 1
Han Shenba649282015-08-05 17:19:55 -0700191
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800192 def Run(self):
193 self.start_time = time.time()
Han Shenba649282015-08-05 17:19:55 -0700194 if self._schedv2 is not None:
195 self._schedv2.run_sched()
196 else:
197 self.active_threads = []
Caroline Ticee1a28bd2016-08-02 16:49:57 -0700198 for run in self.benchmark_runs:
Han Shenba649282015-08-05 17:19:55 -0700199 # Set threads to daemon so program exits when ctrl-c is pressed.
Caroline Ticee1a28bd2016-08-02 16:49:57 -0700200 run.daemon = True
201 run.start()
202 self.active_threads.append(run)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800203
204 def SetCacheConditions(self, cache_conditions):
Caroline Ticee1a28bd2016-08-02 16:49:57 -0700205 for run in self.benchmark_runs:
206 run.SetCacheConditions(cache_conditions)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800207
208 def Cleanup(self):
cmticee5bc63b2015-05-27 16:59:37 -0700209 """Make sure all machines are unlocked."""
cmtice517dc982015-06-12 12:22:32 -0700210 if self.locks_dir:
211 # We are using the file locks mechanism, so call machine_manager.Cleanup
212 # to unlock everything.
213 self.machine_manager.Cleanup()
Caroline Tice7057cf62015-12-10 12:09:40 -0800214
Zhizhou Yangcdd9e342019-09-19 20:56:32 -0700215 if test_flag.GetTestMode() or not self.locked_machines:
216 return
cmticef3eb8032015-07-27 13:55:52 -0700217
Zhizhou Yangcdd9e342019-09-19 20:56:32 -0700218 # If we locked any machines earlier, make sure we unlock them now.
219 if self.lock_mgr:
220 machine_states = self.lock_mgr.GetMachineStates('unlock')
221 self.lock_mgr.CheckMachineLocks(machine_states, 'unlock')
222 unlocked_machines = self.lock_mgr.UpdateMachines(False)
223 failed_machines = [
224 m for m in self.locked_machines if m not in unlocked_machines
225 ]
226 if failed_machines:
Christopher Di Bella53e9fbe2021-03-18 20:31:06 +0000227 raise RuntimeError('These machines are not unlocked correctly: %s' %
228 failed_machines)
Zhizhou Yangcdd9e342019-09-19 20:56:32 -0700229 self.lock_mgr = None