Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 1 | #! /usr/bin/env python |
| 2 | # Copyright (c) 2013 The Chromium OS Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 5 | from collections import Counter, defaultdict, namedtuple, OrderedDict |
| 6 | from itertools import chain |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 7 | from mtreplay import MTReplay |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 8 | from queryengine import Query, QueryEngine, QueryMatch, QueryResult |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 9 | import re |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 10 | |
| 11 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 12 | class MTStatQuery(Query): |
| 13 | """ Searches for MTStat lines. |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 14 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 15 | An MTStat line looks like 'MTStat:124.321:Key=Value' and is used |
| 16 | for generating statistics and searching of numeric values. |
| 17 | """ |
| 18 | line_regex = re.compile('MTStat:([0-9]+\.[0-9]+):(\w+)([=:])(\w+)') |
| 19 | line_match = namedtuple("LineMatch", "timestamp key operator value") |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 20 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 21 | @classmethod |
| 22 | def MatchLine(cls, line): |
| 23 | match = cls.line_regex.match(line) |
Dennis Kempin | 6e03a43 | 2013-06-25 09:00:53 -0700 | [diff] [blame] | 24 | if match: |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 25 | return cls.line_match( |
| 26 | timestamp=match.group(1), |
| 27 | key=match.group(2), |
| 28 | operator=match.group(3), |
| 29 | value=match.group(4) |
| 30 | ) |
| 31 | return None |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 32 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 33 | # supported operators for queries |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 34 | search_operators = { |
| 35 | ">": lambda a, b: float(a) > float(b), |
| 36 | ">=": lambda a, b: float(a) >= float(b), |
| 37 | "<": lambda a, b: float(a) < float(b), |
| 38 | "<=": lambda a, b: float(a) <= float(b), |
| 39 | "=": lambda a, b: str(a) == str(b), |
| 40 | "==": lambda a, b: str(a) == str(b), |
| 41 | "!=": lambda a, b: str(a) != str(b) |
| 42 | } |
| 43 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 44 | def __init__(self, query=None, capture_logs=False): |
| 45 | self.query = query |
| 46 | self.capture_logs = capture_logs |
| 47 | # parse search query |
| 48 | if self.query: |
| 49 | # example: Key >= 32 |
| 50 | query_regex = re.compile('\s*(\w+)\s*([<>=!]+)\s*([0-9a-zA-Z]+)\s*') |
| 51 | match = query_regex.match(query) |
| 52 | if not match: |
| 53 | raise ValueError(query, " is not a valid query") |
| 54 | self.key = match.group(1) |
| 55 | self.op = match.group(2) |
| 56 | self.value = match.group(3) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 57 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 58 | def FindMatches(self, replay_results, filename): |
| 59 | # find MTStat lines in log |
| 60 | lines = replay_results.gestures_log.splitlines() |
| 61 | lines = filter(lambda line: MTStatQuery.MatchLine(line), lines) |
| 62 | all_matches = map(lambda line: MTStatMatch(line, filename), lines) |
| 63 | |
| 64 | # samples are denoted by the : operator, all of them are returned. |
| 65 | samples = filter(lambda match: match.operator == ":", all_matches) |
| 66 | |
| 67 | # updates are denoted by the = operator and force only the last |
| 68 | # update to be returned. |
| 69 | updates = filter(lambda match: match.operator == "=", all_matches) |
| 70 | last_updates = dict([(update.key, update) for update in updates]) |
| 71 | |
| 72 | matches = samples + last_updates.values() |
| 73 | |
| 74 | # filter by search query if requested |
| 75 | if self.query: |
| 76 | matches = filter(self.Match, matches) |
| 77 | return matches |
| 78 | |
| 79 | def Match(self, match): |
| 80 | op = MTStatQuery.search_operators[self.op] |
| 81 | return (match.key == self.key and |
| 82 | op(match.value, self.value)) |
| 83 | |
| 84 | |
| 85 | class MTStatMatch(QueryMatch): |
| 86 | def __init__(self, line, file): |
| 87 | match = MTStatQuery.MatchLine(line) |
| 88 | QueryMatch.__init__(self, file, float(match.timestamp), line) |
| 89 | self.key = match.key |
| 90 | self.operator = match.operator |
| 91 | self.value = match.value |
| 92 | |
| 93 | |
| 94 | class RegexQuery(Query): |
| 95 | """ Searches the raw gestures log with a regular expression """ |
| 96 | def __init__(self, query): |
| 97 | self.regex = re.compile(query) |
| 98 | self.capture_logs = True |
| 99 | |
| 100 | def FindMatches(self, replay_results, filename): |
| 101 | lines = replay_results.gestures_log.splitlines() |
| 102 | lines = filter(lambda line: self.regex.match(line), lines) |
| 103 | matches = map(lambda line: QueryMatch(filename, None, line), lines) |
| 104 | return matches |
| 105 | |
| 106 | |
| 107 | class GesturesQuery(Query): |
| 108 | """ Searches for gestures with matching type """ |
| 109 | def __init__(self, type, capture_logs): |
| 110 | self.type = type |
| 111 | self.capture_logs = capture_logs |
| 112 | |
| 113 | def FindMatches(self, replay_results, filename): |
| 114 | gestures = replay_results.gestures.gestures |
| 115 | if self.type: |
| 116 | gestures = filter(lambda g: g.type == self.type, gestures) |
| 117 | return map(lambda g: GesturesMatch(filename, g), gestures) |
| 118 | |
| 119 | |
| 120 | class GesturesMatch(QueryMatch): |
| 121 | def __init__(self, filename, gesture): |
| 122 | QueryMatch.__init__(self, filename, gesture.start, str(gesture)) |
| 123 | self.gesture = gesture |
| 124 | |
| 125 | |
| 126 | class GesturesDiffQuery(GesturesQuery): |
| 127 | """ Compare gestures to 'original' QueryResult and search for changes """ |
| 128 | def __init__(self, type, original): |
| 129 | self.type = type |
| 130 | self.original = original |
| 131 | self.capture_logs = True |
| 132 | |
| 133 | def FindMatches(self, replay_results, filename): |
| 134 | self_matches = GesturesQuery.FindMatches(self, replay_results, filename) |
| 135 | original_matches = self.original.matches |
| 136 | |
| 137 | size = min(len(original_matches), len(self_matches)) |
| 138 | matches = [] |
| 139 | for i in range(size): |
| 140 | if str(self_matches[i].gesture) != str(original_matches[i].gesture): |
| 141 | match = GesturesDiffMatch(filename, self_matches[i].gesture, |
| 142 | original_matches[i].gesture) |
| 143 | matches.append(match) |
| 144 | return matches |
| 145 | |
| 146 | |
| 147 | class GesturesDiffMatch(GesturesMatch): |
| 148 | def __init__(self, filename, new, original): |
| 149 | GesturesMatch.__init__(self, filename, new) |
| 150 | self.original = original |
| 151 | |
| 152 | def __str__(self): |
| 153 | return "%f: %s != %s" % (self.timestamp, str(self.gesture), |
| 154 | str(self.original)) |
| 155 | |
| 156 | |
| 157 | class MTStat(object): |
Dennis Kempin | 7432eb0 | 2014-03-18 13:41:41 -0700 | [diff] [blame^] | 158 | def SearchChanges(self, type=None, number=None, platform=None, parallel=True): |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 159 | """ Search for changed gestures. |
| 160 | |
| 161 | This command will compare the gestures output of the HEAD version of |
| 162 | the gestures library with the local version. |
| 163 | Optionally the type of gestures to look at can be specified by 'type' |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 164 | """ |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 165 | engine = QueryEngine() |
Dennis Kempin | 7432eb0 | 2014-03-18 13:41:41 -0700 | [diff] [blame^] | 166 | files = engine.SelectFiles(number, platform) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 167 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 168 | MTReplay().Recompile(head=True) |
| 169 | gestures_query = GesturesQuery(type, False) |
| 170 | original_results = engine.Execute(files, gestures_query, parallel=parallel) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 171 | |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 172 | MTReplay().Recompile() |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 173 | diff_queries =dict([ |
| 174 | (file, GesturesDiffQuery(type, original_results[file])) |
| 175 | for file in files if file in original_results]) |
| 176 | return engine.Execute(files, diff_queries, parallel=parallel) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 177 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 178 | def Search(self, search=None, gestures=None, regex=None, |
Dennis Kempin | 7432eb0 | 2014-03-18 13:41:41 -0700 | [diff] [blame^] | 179 | number=None, platform=None, parallel=True): |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 180 | """ Search for occurences of a specific tag or regex. |
| 181 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 182 | Specify either a 'search' or a 'regex'. Search queries are formatted |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 183 | in a simple "key operator value" format. For example: |
| 184 | "MyKey > 5" will return all matches where MyKey has a value |
| 185 | of greater than 5. |
| 186 | Supported operators are: >, <, >=, <=, =, != |
| 187 | |
| 188 | number: optional number of random reports to use |
| 189 | parallel: use parallel processing |
| 190 | |
| 191 | returns a dictionary of lists containing the matches |
| 192 | for each file. |
| 193 | """ |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 194 | engine = QueryEngine() |
Dennis Kempin | 7432eb0 | 2014-03-18 13:41:41 -0700 | [diff] [blame^] | 195 | files = engine.SelectFiles(number, platform) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 196 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 197 | if search: |
| 198 | query = MTStatQuery(search, True) |
| 199 | elif regex: |
| 200 | query = RegexQuery(regex) |
| 201 | elif gestures: |
| 202 | query = GesturesQuery(gestures, True) |
| 203 | else: |
| 204 | return None |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 205 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 206 | return engine.Execute(files, query, parallel=parallel) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 207 | |
Dennis Kempin | 7432eb0 | 2014-03-18 13:41:41 -0700 | [diff] [blame^] | 208 | def GatherStats(self, number=None, platform=None, parallel=True, num_bins=10): |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 209 | """ Gathers stats on feedback reports. |
| 210 | |
| 211 | Returns a dictionary with a histogram for each recorded key. |
| 212 | """ |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 213 | engine = QueryEngine() |
Dennis Kempin | 7432eb0 | 2014-03-18 13:41:41 -0700 | [diff] [blame^] | 214 | files = engine.SelectFiles(number, platform) |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 215 | results = engine.Execute(files, MTStatQuery(), parallel=parallel) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 216 | |
| 217 | # gather values for each key in a list |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 218 | all_matches = chain(*[result.matches for result in results.values()]) |
| 219 | value_collection = defaultdict(list) |
| 220 | for match in all_matches: |
| 221 | value_collection[match.key].append(match.value) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 222 | |
| 223 | # build histograms |
| 224 | histograms = {} |
| 225 | for key, values in value_collection.items(): |
| 226 | histograms[key] = self._Histogram(values, num_bins) |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 227 | return OrderedDict(sorted(histograms.items(), key=lambda t: t[0])) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 228 | |
Dennis Kempin | 5457a75 | 2013-07-25 09:20:03 -0700 | [diff] [blame] | 229 | def Download(self, num, offset, parallel=True): |
| 230 | QueryEngine().Download(num, offset, parallel) |
| 231 | |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 232 | def _Histogram(self, values, num_bins): |
| 233 | def RawHistogram(values): |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 234 | return OrderedDict(Counter(values)) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 235 | |
| 236 | # convert all items to integers. |
| 237 | integers = [] |
| 238 | for value in values: |
| 239 | try: |
| 240 | integers.append(int(value)) |
| 241 | except: |
| 242 | # not an integer. |
| 243 | return RawHistogram(values) |
| 244 | |
| 245 | # don't condense lists that are already small enough |
| 246 | if len(set(integers)) <= num_bins: |
| 247 | return RawHistogram(integers) |
| 248 | |
| 249 | # all integer values, use bins for histogram |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 250 | histogram = OrderedDict() |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 251 | integers = sorted(integers) |
| 252 | |
| 253 | # calculate bin size (append one at the end to include last value) |
| 254 | begin = integers[0] |
| 255 | end = integers[-1] + 1 |
| 256 | bin_size = float(end - begin) / float(num_bins) |
| 257 | |
| 258 | # remove each bins integers from the list and count them. |
| 259 | for i in range(num_bins): |
| 260 | high_v = round((i + 1) * bin_size) + begin |
| 261 | |
| 262 | filtered = filter(lambda i: i >= high_v, integers) |
| 263 | histogram["<%d" % high_v] = len(integers) - len(filtered) |
| 264 | integers = filtered |
| 265 | return histogram |