Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 1 | #! /usr/bin/env python |
| 2 | # Copyright (c) 2013 The Chromium OS Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
Harry Cutts | 0edf157 | 2020-01-21 15:42:10 -0800 | [diff] [blame^] | 5 | |
| 6 | from __future__ import print_function |
| 7 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 8 | from collections import Counter, defaultdict, namedtuple, OrderedDict |
| 9 | from itertools import chain |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 10 | from mtreplay import MTReplay |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 11 | from queryengine import Query, QueryEngine, QueryMatch, QueryResult |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 12 | import re |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 13 | |
| 14 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 15 | class MTStatQuery(Query): |
| 16 | """ Searches for MTStat lines. |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 17 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 18 | An MTStat line looks like 'MTStat:124.321:Key=Value' and is used |
| 19 | for generating statistics and searching of numeric values. |
| 20 | """ |
| 21 | line_regex = re.compile('MTStat:([0-9]+\.[0-9]+):(\w+)([=:])(\w+)') |
| 22 | line_match = namedtuple("LineMatch", "timestamp key operator value") |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 23 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 24 | @classmethod |
| 25 | def MatchLine(cls, line): |
| 26 | match = cls.line_regex.match(line) |
Dennis Kempin | 6e03a43 | 2013-06-25 09:00:53 -0700 | [diff] [blame] | 27 | if match: |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 28 | return cls.line_match( |
| 29 | timestamp=match.group(1), |
| 30 | key=match.group(2), |
| 31 | operator=match.group(3), |
| 32 | value=match.group(4) |
| 33 | ) |
| 34 | return None |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 35 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 36 | # supported operators for queries |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 37 | search_operators = { |
| 38 | ">": lambda a, b: float(a) > float(b), |
| 39 | ">=": lambda a, b: float(a) >= float(b), |
| 40 | "<": lambda a, b: float(a) < float(b), |
| 41 | "<=": lambda a, b: float(a) <= float(b), |
| 42 | "=": lambda a, b: str(a) == str(b), |
| 43 | "==": lambda a, b: str(a) == str(b), |
| 44 | "!=": lambda a, b: str(a) != str(b) |
| 45 | } |
| 46 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 47 | def __init__(self, query=None, capture_logs=False): |
| 48 | self.query = query |
| 49 | self.capture_logs = capture_logs |
| 50 | # parse search query |
| 51 | if self.query: |
| 52 | # example: Key >= 32 |
| 53 | query_regex = re.compile('\s*(\w+)\s*([<>=!]+)\s*([0-9a-zA-Z]+)\s*') |
| 54 | match = query_regex.match(query) |
| 55 | if not match: |
| 56 | raise ValueError(query, " is not a valid query") |
| 57 | self.key = match.group(1) |
| 58 | self.op = match.group(2) |
| 59 | self.value = match.group(3) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 60 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 61 | def FindMatches(self, replay_results, filename): |
| 62 | # find MTStat lines in log |
| 63 | lines = replay_results.gestures_log.splitlines() |
| 64 | lines = filter(lambda line: MTStatQuery.MatchLine(line), lines) |
| 65 | all_matches = map(lambda line: MTStatMatch(line, filename), lines) |
| 66 | |
| 67 | # samples are denoted by the : operator, all of them are returned. |
| 68 | samples = filter(lambda match: match.operator == ":", all_matches) |
| 69 | |
| 70 | # updates are denoted by the = operator and force only the last |
| 71 | # update to be returned. |
| 72 | updates = filter(lambda match: match.operator == "=", all_matches) |
| 73 | last_updates = dict([(update.key, update) for update in updates]) |
| 74 | |
| 75 | matches = samples + last_updates.values() |
| 76 | |
| 77 | # filter by search query if requested |
| 78 | if self.query: |
| 79 | matches = filter(self.Match, matches) |
| 80 | return matches |
| 81 | |
| 82 | def Match(self, match): |
| 83 | op = MTStatQuery.search_operators[self.op] |
| 84 | return (match.key == self.key and |
| 85 | op(match.value, self.value)) |
| 86 | |
| 87 | |
| 88 | class MTStatMatch(QueryMatch): |
| 89 | def __init__(self, line, file): |
| 90 | match = MTStatQuery.MatchLine(line) |
| 91 | QueryMatch.__init__(self, file, float(match.timestamp), line) |
| 92 | self.key = match.key |
| 93 | self.operator = match.operator |
| 94 | self.value = match.value |
| 95 | |
| 96 | |
| 97 | class RegexQuery(Query): |
| 98 | """ Searches the raw gestures log with a regular expression """ |
| 99 | def __init__(self, query): |
| 100 | self.regex = re.compile(query) |
| 101 | self.capture_logs = True |
| 102 | |
| 103 | def FindMatches(self, replay_results, filename): |
| 104 | lines = replay_results.gestures_log.splitlines() |
| 105 | lines = filter(lambda line: self.regex.match(line), lines) |
| 106 | matches = map(lambda line: QueryMatch(filename, None, line), lines) |
| 107 | return matches |
| 108 | |
| 109 | |
| 110 | class GesturesQuery(Query): |
| 111 | """ Searches for gestures with matching type """ |
| 112 | def __init__(self, type, capture_logs): |
| 113 | self.type = type |
| 114 | self.capture_logs = capture_logs |
| 115 | |
| 116 | def FindMatches(self, replay_results, filename): |
| 117 | gestures = replay_results.gestures.gestures |
| 118 | if self.type: |
| 119 | gestures = filter(lambda g: g.type == self.type, gestures) |
| 120 | return map(lambda g: GesturesMatch(filename, g), gestures) |
| 121 | |
| 122 | |
| 123 | class GesturesMatch(QueryMatch): |
| 124 | def __init__(self, filename, gesture): |
| 125 | QueryMatch.__init__(self, filename, gesture.start, str(gesture)) |
| 126 | self.gesture = gesture |
| 127 | |
| 128 | |
| 129 | class GesturesDiffQuery(GesturesQuery): |
| 130 | """ Compare gestures to 'original' QueryResult and search for changes """ |
| 131 | def __init__(self, type, original): |
| 132 | self.type = type |
| 133 | self.original = original |
| 134 | self.capture_logs = True |
| 135 | |
| 136 | def FindMatches(self, replay_results, filename): |
| 137 | self_matches = GesturesQuery.FindMatches(self, replay_results, filename) |
| 138 | original_matches = self.original.matches |
| 139 | |
| 140 | size = min(len(original_matches), len(self_matches)) |
| 141 | matches = [] |
| 142 | for i in range(size): |
| 143 | if str(self_matches[i].gesture) != str(original_matches[i].gesture): |
| 144 | match = GesturesDiffMatch(filename, self_matches[i].gesture, |
| 145 | original_matches[i].gesture) |
| 146 | matches.append(match) |
| 147 | return matches |
| 148 | |
| 149 | |
| 150 | class GesturesDiffMatch(GesturesMatch): |
| 151 | def __init__(self, filename, new, original): |
| 152 | GesturesMatch.__init__(self, filename, new) |
| 153 | self.original = original |
| 154 | |
| 155 | def __str__(self): |
| 156 | return "%f: %s != %s" % (self.timestamp, str(self.gesture), |
| 157 | str(self.original)) |
| 158 | |
| 159 | |
| 160 | class MTStat(object): |
Dennis Kempin | 7432eb0 | 2014-03-18 13:41:41 -0700 | [diff] [blame] | 161 | def SearchChanges(self, type=None, number=None, platform=None, parallel=True): |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 162 | """ Search for changed gestures. |
| 163 | |
| 164 | This command will compare the gestures output of the HEAD version of |
| 165 | the gestures library with the local version. |
| 166 | Optionally the type of gestures to look at can be specified by 'type' |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 167 | """ |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 168 | engine = QueryEngine() |
Dennis Kempin | 7432eb0 | 2014-03-18 13:41:41 -0700 | [diff] [blame] | 169 | files = engine.SelectFiles(number, platform) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 170 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 171 | MTReplay().Recompile(head=True) |
| 172 | gestures_query = GesturesQuery(type, False) |
| 173 | original_results = engine.Execute(files, gestures_query, parallel=parallel) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 174 | |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 175 | MTReplay().Recompile() |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 176 | diff_queries =dict([ |
| 177 | (file, GesturesDiffQuery(type, original_results[file])) |
| 178 | for file in files if file in original_results]) |
| 179 | return engine.Execute(files, diff_queries, parallel=parallel) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 180 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 181 | def Search(self, search=None, gestures=None, regex=None, |
Dennis Kempin | 7432eb0 | 2014-03-18 13:41:41 -0700 | [diff] [blame] | 182 | number=None, platform=None, parallel=True): |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 183 | """ Search for occurences of a specific tag or regex. |
| 184 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 185 | Specify either a 'search' or a 'regex'. Search queries are formatted |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 186 | in a simple "key operator value" format. For example: |
| 187 | "MyKey > 5" will return all matches where MyKey has a value |
| 188 | of greater than 5. |
| 189 | Supported operators are: >, <, >=, <=, =, != |
| 190 | |
| 191 | number: optional number of random reports to use |
| 192 | parallel: use parallel processing |
| 193 | |
| 194 | returns a dictionary of lists containing the matches |
| 195 | for each file. |
| 196 | """ |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 197 | engine = QueryEngine() |
Dennis Kempin | 7432eb0 | 2014-03-18 13:41:41 -0700 | [diff] [blame] | 198 | files = engine.SelectFiles(number, platform) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 199 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 200 | if search: |
| 201 | query = MTStatQuery(search, True) |
| 202 | elif regex: |
| 203 | query = RegexQuery(regex) |
| 204 | elif gestures: |
| 205 | query = GesturesQuery(gestures, True) |
| 206 | else: |
| 207 | return None |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 208 | |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 209 | return engine.Execute(files, query, parallel=parallel) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 210 | |
Dennis Kempin | 7432eb0 | 2014-03-18 13:41:41 -0700 | [diff] [blame] | 211 | def GatherStats(self, number=None, platform=None, parallel=True, num_bins=10): |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 212 | """ Gathers stats on feedback reports. |
| 213 | |
| 214 | Returns a dictionary with a histogram for each recorded key. |
| 215 | """ |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 216 | engine = QueryEngine() |
Dennis Kempin | 7432eb0 | 2014-03-18 13:41:41 -0700 | [diff] [blame] | 217 | files = engine.SelectFiles(number, platform) |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 218 | results = engine.Execute(files, MTStatQuery(), parallel=parallel) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 219 | |
| 220 | # gather values for each key in a list |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 221 | all_matches = chain(*[result.matches for result in results.values()]) |
| 222 | value_collection = defaultdict(list) |
| 223 | for match in all_matches: |
| 224 | value_collection[match.key].append(match.value) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 225 | |
| 226 | # build histograms |
| 227 | histograms = {} |
| 228 | for key, values in value_collection.items(): |
| 229 | histograms[key] = self._Histogram(values, num_bins) |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 230 | return OrderedDict(sorted(histograms.items(), key=lambda t: t[0])) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 231 | |
Sean O'Brien | fd204da | 2017-05-02 15:13:11 -0700 | [diff] [blame] | 232 | def Download(self, num, parallel=True): |
| 233 | QueryEngine().Download(num, parallel) |
Dennis Kempin | 5457a75 | 2013-07-25 09:20:03 -0700 | [diff] [blame] | 234 | |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 235 | def _Histogram(self, values, num_bins): |
| 236 | def RawHistogram(values): |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 237 | return OrderedDict(Counter(values)) |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 238 | |
| 239 | # convert all items to integers. |
| 240 | integers = [] |
| 241 | for value in values: |
| 242 | try: |
| 243 | integers.append(int(value)) |
| 244 | except: |
| 245 | # not an integer. |
| 246 | return RawHistogram(values) |
| 247 | |
| 248 | # don't condense lists that are already small enough |
| 249 | if len(set(integers)) <= num_bins: |
| 250 | return RawHistogram(integers) |
| 251 | |
| 252 | # all integer values, use bins for histogram |
Dennis Kempin | d5b5902 | 2013-07-17 14:12:55 -0700 | [diff] [blame] | 253 | histogram = OrderedDict() |
Dennis Kempin | 253ee05 | 2013-07-01 14:58:22 -0700 | [diff] [blame] | 254 | integers = sorted(integers) |
| 255 | |
| 256 | # calculate bin size (append one at the end to include last value) |
| 257 | begin = integers[0] |
| 258 | end = integers[-1] + 1 |
| 259 | bin_size = float(end - begin) / float(num_bins) |
| 260 | |
| 261 | # remove each bins integers from the list and count them. |
| 262 | for i in range(num_bins): |
| 263 | high_v = round((i + 1) * bin_size) + begin |
| 264 | |
| 265 | filtered = filter(lambda i: i >= high_v, integers) |
| 266 | histogram["<%d" % high_v] = len(integers) - len(filtered) |
| 267 | integers = filtered |
| 268 | return histogram |