Yuheng Long | 057ad5c | 2013-08-07 08:48:05 -0700 | [diff] [blame^] | 1 | # Copyright (c) 2013 The Chromium OS Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | """The hill genetic algorithm. |
| 6 | |
| 7 | Part of the Chrome build flags optimization. |
| 8 | """ |
| 9 | |
| 10 | __author__ = 'yuhenglong@google.com (Yuheng Long)' |
| 11 | |
| 12 | import random |
| 13 | |
| 14 | import flags |
| 15 | from flags import Flag |
| 16 | from flags import FlagSet |
| 17 | from generation import Generation |
| 18 | from task import Task |
| 19 | |
| 20 | |
| 21 | def CrossoverWith(first_flag, second_flag): |
| 22 | """Get a crossed over gene. |
| 23 | |
| 24 | At present, this just picks either/or of these values. However, it could be |
| 25 | implemented as an integer maskover effort, if required. |
| 26 | |
| 27 | Args: |
| 28 | first_flag: The first gene (Flag) to cross over with. |
| 29 | second_flag: The second gene (Flag) to cross over with. |
| 30 | |
| 31 | Returns: |
| 32 | A Flag that can be considered appropriately randomly blended between the |
| 33 | first and second input flag. |
| 34 | """ |
| 35 | |
| 36 | return first_flag if random.randint(0, 1) else second_flag |
| 37 | |
| 38 | |
| 39 | def RandomMutate(specs, flag_set, mutation_rate): |
| 40 | """Randomly mutate the content of a task. |
| 41 | |
| 42 | Args: |
| 43 | specs: A list of spec from which the flag set is created. |
| 44 | flag_set: The current flag set being mutated |
| 45 | mutation_rate: What fraction of genes to mutate. |
| 46 | |
| 47 | Returns: |
| 48 | A Genetic Task constructed by randomly mutating the input flag set. |
| 49 | """ |
| 50 | |
| 51 | results_flags = [] |
| 52 | |
| 53 | for spec in specs: |
| 54 | # Randomly choose whether this flag should be mutated. |
| 55 | if random.randint(0, int(1 / mutation_rate)): |
| 56 | continue |
| 57 | |
| 58 | # If the flag is not already in the flag set, it is added. |
| 59 | if spec not in flag_set: |
| 60 | results_flags.append(Flag(spec)) |
| 61 | continue |
| 62 | |
| 63 | # If the flag is already in the flag set, it is mutated. |
| 64 | result = flags.Search(spec) |
| 65 | |
| 66 | # The value of a numeric flag will be changed, and a boolean flag will be |
| 67 | # dropped. |
| 68 | if not result: |
| 69 | continue |
| 70 | |
| 71 | value = flag_set[spec].GetValue() |
| 72 | |
| 73 | # Randomly select a nearby value of the current value of the flag. |
| 74 | rand_arr = [value] |
| 75 | if value + 1 < int(result.group('end')): |
| 76 | rand_arr.append(value + 1) |
| 77 | |
| 78 | rand_arr.append(value - 1) |
| 79 | value = random.sample(rand_arr, 1)[0] |
| 80 | |
| 81 | # If the value is smaller than the start of the spec, this flag will be |
| 82 | # dropped. |
| 83 | if value != int(result.group('start')) - 1: |
| 84 | results_flags.append(Flag(spec, value)) |
| 85 | |
| 86 | return GATask(FlagSet(results_flags)) |
| 87 | |
| 88 | |
| 89 | class GATask(Task): |
| 90 | def __init__(self, flag_set): |
| 91 | Task.__init__(self, flag_set) |
| 92 | |
| 93 | def ReproduceWith(self, other, specs, mutation_rate): |
| 94 | """Reproduce with other FlagSet. |
| 95 | |
| 96 | Args: |
| 97 | other: A FlagSet to reproduce with. |
| 98 | specs: A list of spec from which the flag set is created. |
| 99 | mutation_rate: one in mutation_rate flags will be mutated (replaced by a |
| 100 | random version of the same flag, instead of one from either of the |
| 101 | parents). Set to 0 to disable mutation. |
| 102 | |
| 103 | Returns: |
| 104 | A GA task made by mixing self with other. |
| 105 | """ |
| 106 | |
| 107 | # Get the flag dictionary. |
| 108 | father_flags = self.GetFlags().GetFlags() |
| 109 | mother_flags = other.GetFlags().GetFlags() |
| 110 | |
| 111 | # Flags that are common in both parents and flags that belong to only one |
| 112 | # parent. |
| 113 | self_flags = [] |
| 114 | other_flags = [] |
| 115 | common_flags = [] |
| 116 | |
| 117 | # Find out flags that are common to both parent and flags that belong soly |
| 118 | # to one parent. |
| 119 | for self_flag in father_flags: |
| 120 | if self_flag in mother_flags: |
| 121 | common_flags.append(self_flag) |
| 122 | else: |
| 123 | self_flags.append(self_flag) |
| 124 | |
| 125 | for other_flag in mother_flags: |
| 126 | if other_flag not in father_flags: |
| 127 | other_flags.append(other_flag) |
| 128 | |
| 129 | # Randomly select flags that belong to only one parent. |
| 130 | output_flags = [father_flags[f] for f in self_flags if random.randint(0, 1)] |
| 131 | others = [mother_flags[f] for f in other_flags if random.randint(0, 1)] |
| 132 | output_flags.extend(others) |
| 133 | # Turn on flags that belong to both parent. Randomly choose the value of the |
| 134 | # flag from either parent. |
| 135 | for flag in common_flags: |
| 136 | output_flags.append(CrossoverWith(father_flags[flag], mother_flags[flag])) |
| 137 | |
| 138 | # Mutate flags |
| 139 | if mutation_rate: |
| 140 | return RandomMutate(specs, FlagSet(output_flags), mutation_rate) |
| 141 | |
| 142 | return GATask(FlagSet(output_flags)) |
| 143 | |
| 144 | |
| 145 | class GAGeneration(Generation): |
| 146 | """The Genetic Algorithm.""" |
| 147 | |
| 148 | # The value checks whether the algorithm has converged and arrives at a fixed |
| 149 | # point. If STOP_THRESHOLD of generations have not seen any performance |
| 150 | # improvement, the Genetic Algorithm stops. |
| 151 | STOP_THRESHOLD = None |
| 152 | |
| 153 | # Number of tasks in each generation. |
| 154 | NUM_CHROMOSOMES = None |
| 155 | |
| 156 | # The value checks whether the algorithm has converged and arrives at a fixed |
| 157 | # point. If NUM_TRIALS of trials have been attempted to generate a new task |
| 158 | # without a success, the Genetic Algorithm stops. |
| 159 | NUM_TRIALS = None |
| 160 | |
| 161 | # The flags that can be used to generate new tasks. |
| 162 | SPECS = None |
| 163 | |
| 164 | # What fraction of genes to mutate. |
| 165 | MUTATION_RATE = 0 |
| 166 | |
| 167 | @staticmethod |
| 168 | def InitMetaData(stop_threshold, num_chromosomes, num_trials, specs, |
| 169 | mutation_rate): |
| 170 | """Set up the meta data for the Genetic Algorithm. |
| 171 | |
| 172 | Args: |
| 173 | stop_threshold: The number of generations, upon which no performance has |
| 174 | seen, the Genetic Algorithm stops. |
| 175 | num_chromosomes: Number of tasks in each generation. |
| 176 | num_trials: The number of trials, upon which new task has been tried to |
| 177 | generated without success, the Genetic Algorithm stops. |
| 178 | specs: The flags that can be used to generate new tasks. |
| 179 | mutation_rate: What fraction of genes to mutate. |
| 180 | """ |
| 181 | |
| 182 | GAGeneration.STOP_THRESHOLD = stop_threshold |
| 183 | GAGeneration.NUM_CHROMOSOMES = num_chromosomes |
| 184 | GAGeneration.NUM_TRIALS = num_trials |
| 185 | GAGeneration.SPECS = specs |
| 186 | GAGeneration.MUTATION_RATE = mutation_rate |
| 187 | |
| 188 | def __init__(self, tasks, parents, total_stucks): |
| 189 | """Set up the meta data for the Genetic Algorithm. |
| 190 | |
| 191 | Args: |
| 192 | tasks: A set of tasks to be run. |
| 193 | parents: A set of tasks from which this new generation is produced. This |
| 194 | set also contains the best tasks generated so far. |
| 195 | total_stucks: The number of generations that have not seen improvement. |
| 196 | The Genetic Algorithm will stop once the total_stucks equals to |
| 197 | NUM_TRIALS defined in the GAGeneration class. |
| 198 | """ |
| 199 | |
| 200 | Generation.__init__(self, tasks, parents) |
| 201 | self._total_stucks = total_stucks |
| 202 | |
| 203 | def Improve(self): |
| 204 | """True if this generation has improvement upon its parent generation.""" |
| 205 | |
| 206 | tasks = self.Pool() |
| 207 | parents = self.CandidatePool() |
| 208 | |
| 209 | # The first generate does not have parents. |
| 210 | if not parents: |
| 211 | return True |
| 212 | |
| 213 | # Found out whether a task has improvement upon the best task in the |
| 214 | # parent generation. |
| 215 | best_parent = sorted(parents, key=lambda task: task.GetTestResult())[0] |
| 216 | best_current = sorted(tasks, key=lambda task: task.GetTestResult())[0] |
| 217 | |
| 218 | # At least one task has improvement. |
| 219 | if best_current.Improve(best_parent): |
| 220 | self._total_stucks = 0 |
| 221 | return True |
| 222 | |
| 223 | # If STOP_THRESHOLD of generations have no improvement, the algorithm stops. |
| 224 | if self._total_stucks >= GAGeneration.STOP_THRESHOLD: |
| 225 | return False |
| 226 | |
| 227 | self._total_stucks += 1 |
| 228 | return True |
| 229 | |
| 230 | def Next(self, cache): |
| 231 | """Calculate the next generation. |
| 232 | |
| 233 | Generate a new generation from the a set of tasks. This set contains the |
| 234 | best set seen so far and the tasks executed in the parent generation. |
| 235 | |
| 236 | Args: |
| 237 | cache: A set of tasks that have been generated before. |
| 238 | |
| 239 | Returns: |
| 240 | A set of new generations. |
| 241 | """ |
| 242 | |
| 243 | target_len = GAGeneration.NUM_CHROMOSOMES |
| 244 | specs = GAGeneration.SPECS |
| 245 | mutation_rate = GAGeneration.MUTATION_RATE |
| 246 | |
| 247 | # Collect a set of size target_len of tasks. This set will be used to |
| 248 | # produce a new generation of tasks. |
| 249 | gen_tasks = [task for task in self.Pool()] |
| 250 | |
| 251 | parents = self.CandidatePool() |
| 252 | if parents: |
| 253 | gen_tasks.extend(parents) |
| 254 | |
| 255 | # A set of tasks that are the best. This set will be used as the parent |
| 256 | # generation to produce the next generation. |
| 257 | sort_func = lambda task: task.GetTestResult() |
| 258 | retained_tasks = sorted(gen_tasks, key=sort_func)[:target_len] |
| 259 | |
| 260 | child_pool = set() |
| 261 | for father in retained_tasks: |
| 262 | num_trials = 0 |
| 263 | # Try num_trials times to produce a new child. |
| 264 | while num_trials < GAGeneration.NUM_TRIALS: |
| 265 | # Randomly select another parent. |
| 266 | mother = random.choice(retained_tasks) |
| 267 | # Cross over. |
| 268 | child = mother.ReproduceWith(father, specs, mutation_rate) |
| 269 | if child not in child_pool and child not in cache: |
| 270 | child_pool.add(child) |
| 271 | break |
| 272 | else: |
| 273 | num_trials += 1 |
| 274 | |
| 275 | num_trials = 0 |
| 276 | |
| 277 | while len(child_pool) < target_len and num_trials < GAGeneration.NUM_TRIALS: |
| 278 | for keep_task in retained_tasks: |
| 279 | # Mutation. |
| 280 | child = RandomMutate(specs, keep_task.GetFlags(), mutation_rate) |
| 281 | if child not in child_pool and child not in cache: |
| 282 | child_pool.add(child) |
| 283 | if len(child_pool) >= target_len: |
| 284 | break |
| 285 | else: |
| 286 | num_trials += 1 |
| 287 | |
| 288 | # If NUM_TRIALS of tries have been attempted without generating a set of new |
| 289 | # tasks, the algorithm stops. |
| 290 | if num_trials >= GAGeneration.NUM_TRIALS: |
| 291 | return [] |
| 292 | |
| 293 | assert len(child_pool) == target_len |
| 294 | |
| 295 | return [GAGeneration(child_pool, set(retained_tasks), self._total_stucks)] |