blob: 56b1dcc134ee6eda903d87c7832a2d490cedbaa4 [file] [log] [blame]
Yuheng Long057ad5c2013-08-07 08:48:05 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""The hill genetic algorithm.
6
7Part of the Chrome build flags optimization.
8"""
9
10__author__ = 'yuhenglong@google.com (Yuheng Long)'
11
12import random
13
14import flags
15from flags import Flag
16from flags import FlagSet
17from generation import Generation
18from task import Task
19
20
21def CrossoverWith(first_flag, second_flag):
22 """Get a crossed over gene.
23
24 At present, this just picks either/or of these values. However, it could be
25 implemented as an integer maskover effort, if required.
26
27 Args:
28 first_flag: The first gene (Flag) to cross over with.
29 second_flag: The second gene (Flag) to cross over with.
30
31 Returns:
32 A Flag that can be considered appropriately randomly blended between the
33 first and second input flag.
34 """
35
36 return first_flag if random.randint(0, 1) else second_flag
37
38
39def RandomMutate(specs, flag_set, mutation_rate):
40 """Randomly mutate the content of a task.
41
42 Args:
43 specs: A list of spec from which the flag set is created.
44 flag_set: The current flag set being mutated
45 mutation_rate: What fraction of genes to mutate.
46
47 Returns:
48 A Genetic Task constructed by randomly mutating the input flag set.
49 """
50
51 results_flags = []
52
53 for spec in specs:
54 # Randomly choose whether this flag should be mutated.
55 if random.randint(0, int(1 / mutation_rate)):
56 continue
57
58 # If the flag is not already in the flag set, it is added.
59 if spec not in flag_set:
60 results_flags.append(Flag(spec))
61 continue
62
63 # If the flag is already in the flag set, it is mutated.
64 result = flags.Search(spec)
65
66 # The value of a numeric flag will be changed, and a boolean flag will be
67 # dropped.
68 if not result:
69 continue
70
71 value = flag_set[spec].GetValue()
72
73 # Randomly select a nearby value of the current value of the flag.
74 rand_arr = [value]
75 if value + 1 < int(result.group('end')):
76 rand_arr.append(value + 1)
77
78 rand_arr.append(value - 1)
79 value = random.sample(rand_arr, 1)[0]
80
81 # If the value is smaller than the start of the spec, this flag will be
82 # dropped.
83 if value != int(result.group('start')) - 1:
84 results_flags.append(Flag(spec, value))
85
86 return GATask(FlagSet(results_flags))
87
88
89class GATask(Task):
90 def __init__(self, flag_set):
91 Task.__init__(self, flag_set)
92
93 def ReproduceWith(self, other, specs, mutation_rate):
94 """Reproduce with other FlagSet.
95
96 Args:
97 other: A FlagSet to reproduce with.
98 specs: A list of spec from which the flag set is created.
99 mutation_rate: one in mutation_rate flags will be mutated (replaced by a
100 random version of the same flag, instead of one from either of the
101 parents). Set to 0 to disable mutation.
102
103 Returns:
104 A GA task made by mixing self with other.
105 """
106
107 # Get the flag dictionary.
108 father_flags = self.GetFlags().GetFlags()
109 mother_flags = other.GetFlags().GetFlags()
110
111 # Flags that are common in both parents and flags that belong to only one
112 # parent.
113 self_flags = []
114 other_flags = []
115 common_flags = []
116
117 # Find out flags that are common to both parent and flags that belong soly
118 # to one parent.
119 for self_flag in father_flags:
120 if self_flag in mother_flags:
121 common_flags.append(self_flag)
122 else:
123 self_flags.append(self_flag)
124
125 for other_flag in mother_flags:
126 if other_flag not in father_flags:
127 other_flags.append(other_flag)
128
129 # Randomly select flags that belong to only one parent.
130 output_flags = [father_flags[f] for f in self_flags if random.randint(0, 1)]
131 others = [mother_flags[f] for f in other_flags if random.randint(0, 1)]
132 output_flags.extend(others)
133 # Turn on flags that belong to both parent. Randomly choose the value of the
134 # flag from either parent.
135 for flag in common_flags:
136 output_flags.append(CrossoverWith(father_flags[flag], mother_flags[flag]))
137
138 # Mutate flags
139 if mutation_rate:
140 return RandomMutate(specs, FlagSet(output_flags), mutation_rate)
141
142 return GATask(FlagSet(output_flags))
143
144
145class GAGeneration(Generation):
146 """The Genetic Algorithm."""
147
148 # The value checks whether the algorithm has converged and arrives at a fixed
149 # point. If STOP_THRESHOLD of generations have not seen any performance
150 # improvement, the Genetic Algorithm stops.
151 STOP_THRESHOLD = None
152
153 # Number of tasks in each generation.
154 NUM_CHROMOSOMES = None
155
156 # The value checks whether the algorithm has converged and arrives at a fixed
157 # point. If NUM_TRIALS of trials have been attempted to generate a new task
158 # without a success, the Genetic Algorithm stops.
159 NUM_TRIALS = None
160
161 # The flags that can be used to generate new tasks.
162 SPECS = None
163
164 # What fraction of genes to mutate.
165 MUTATION_RATE = 0
166
167 @staticmethod
168 def InitMetaData(stop_threshold, num_chromosomes, num_trials, specs,
169 mutation_rate):
170 """Set up the meta data for the Genetic Algorithm.
171
172 Args:
173 stop_threshold: The number of generations, upon which no performance has
174 seen, the Genetic Algorithm stops.
175 num_chromosomes: Number of tasks in each generation.
176 num_trials: The number of trials, upon which new task has been tried to
177 generated without success, the Genetic Algorithm stops.
178 specs: The flags that can be used to generate new tasks.
179 mutation_rate: What fraction of genes to mutate.
180 """
181
182 GAGeneration.STOP_THRESHOLD = stop_threshold
183 GAGeneration.NUM_CHROMOSOMES = num_chromosomes
184 GAGeneration.NUM_TRIALS = num_trials
185 GAGeneration.SPECS = specs
186 GAGeneration.MUTATION_RATE = mutation_rate
187
188 def __init__(self, tasks, parents, total_stucks):
189 """Set up the meta data for the Genetic Algorithm.
190
191 Args:
192 tasks: A set of tasks to be run.
193 parents: A set of tasks from which this new generation is produced. This
194 set also contains the best tasks generated so far.
195 total_stucks: The number of generations that have not seen improvement.
196 The Genetic Algorithm will stop once the total_stucks equals to
197 NUM_TRIALS defined in the GAGeneration class.
198 """
199
200 Generation.__init__(self, tasks, parents)
201 self._total_stucks = total_stucks
202
203 def Improve(self):
204 """True if this generation has improvement upon its parent generation."""
205
206 tasks = self.Pool()
207 parents = self.CandidatePool()
208
209 # The first generate does not have parents.
210 if not parents:
211 return True
212
213 # Found out whether a task has improvement upon the best task in the
214 # parent generation.
215 best_parent = sorted(parents, key=lambda task: task.GetTestResult())[0]
216 best_current = sorted(tasks, key=lambda task: task.GetTestResult())[0]
217
218 # At least one task has improvement.
219 if best_current.Improve(best_parent):
220 self._total_stucks = 0
221 return True
222
223 # If STOP_THRESHOLD of generations have no improvement, the algorithm stops.
224 if self._total_stucks >= GAGeneration.STOP_THRESHOLD:
225 return False
226
227 self._total_stucks += 1
228 return True
229
230 def Next(self, cache):
231 """Calculate the next generation.
232
233 Generate a new generation from the a set of tasks. This set contains the
234 best set seen so far and the tasks executed in the parent generation.
235
236 Args:
237 cache: A set of tasks that have been generated before.
238
239 Returns:
240 A set of new generations.
241 """
242
243 target_len = GAGeneration.NUM_CHROMOSOMES
244 specs = GAGeneration.SPECS
245 mutation_rate = GAGeneration.MUTATION_RATE
246
247 # Collect a set of size target_len of tasks. This set will be used to
248 # produce a new generation of tasks.
249 gen_tasks = [task for task in self.Pool()]
250
251 parents = self.CandidatePool()
252 if parents:
253 gen_tasks.extend(parents)
254
255 # A set of tasks that are the best. This set will be used as the parent
256 # generation to produce the next generation.
257 sort_func = lambda task: task.GetTestResult()
258 retained_tasks = sorted(gen_tasks, key=sort_func)[:target_len]
259
260 child_pool = set()
261 for father in retained_tasks:
262 num_trials = 0
263 # Try num_trials times to produce a new child.
264 while num_trials < GAGeneration.NUM_TRIALS:
265 # Randomly select another parent.
266 mother = random.choice(retained_tasks)
267 # Cross over.
268 child = mother.ReproduceWith(father, specs, mutation_rate)
269 if child not in child_pool and child not in cache:
270 child_pool.add(child)
271 break
272 else:
273 num_trials += 1
274
275 num_trials = 0
276
277 while len(child_pool) < target_len and num_trials < GAGeneration.NUM_TRIALS:
278 for keep_task in retained_tasks:
279 # Mutation.
280 child = RandomMutate(specs, keep_task.GetFlags(), mutation_rate)
281 if child not in child_pool and child not in cache:
282 child_pool.add(child)
283 if len(child_pool) >= target_len:
284 break
285 else:
286 num_trials += 1
287
288 # If NUM_TRIALS of tries have been attempted without generating a set of new
289 # tasks, the algorithm stops.
290 if num_trials >= GAGeneration.NUM_TRIALS:
291 return []
292
293 assert len(child_pool) == target_len
294
295 return [GAGeneration(child_pool, set(retained_tasks), self._total_stucks)]