blob: d027789352194f986d60b9b6f98f0141a2e8a9dd [file] [log] [blame]
Dennis Kempin19e972b2013-06-20 13:21:38 -07001#! /usr/bin/env python
2# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5from mtlib.log import Log, FeedbackDownloader, FeedbackLog
6from mtreplay import MTReplay
Dennis Kempin6e03a432013-06-25 09:00:53 -07007import collections
Dennis Kempin19e972b2013-06-20 13:21:38 -07008import json
9import multiprocessing
10import os
11import random
12import re
Dennis Kempin55af9cc2013-06-20 15:07:21 -070013import traceback
Dennis Kempin19e972b2013-06-20 13:21:38 -070014import urllib
15
16script_dir = os.path.dirname(os.path.realpath(__file__))
17log_dir = os.path.join(script_dir, '../cache/logs/')
18if not os.path.exists(log_dir):
19 os.mkdir(log_dir)
20
21
Dennis Kempin253ee052013-07-01 14:58:22 -070022def SortedDict(dict):
23 return collections.OrderedDict(sorted(dict.items(), key=lambda t: t[0]))
24
25
26def MTStatSearchSubprocess(params):
27 return MTStat().FindMatchesInFile(params[0], params[1])
Dennis Kempin6e03a432013-06-25 09:00:53 -070028
Dennis Kempin19e972b2013-06-20 13:21:38 -070029
30def MTStatDownloadSubprocess(id):
31 return MTStat().DownloadFile(id)
32
Dennis Kempin6e03a432013-06-25 09:00:53 -070033
Dennis Kempin253ee052013-07-01 14:58:22 -070034class SearchMatch(object):
35 # Example: MTStat:124.321:Key=Value
36 mtstat_regex = re.compile('MTStat:([0-9]+\.[0-9]+):(\w+)([=:])(\w+)')
Dennis Kempin6e03a432013-06-25 09:00:53 -070037
Dennis Kempin253ee052013-07-01 14:58:22 -070038 def __init__(self, line, file):
39 self.line = line
40 self.file = file
41
42 self.key = None
43 self.value = None
44 self.operator = None
45 self.timestamp = None
46 self.ismtstat = False
47
48 match = SearchMatch.mtstat_regex.search(self.line)
Dennis Kempin6e03a432013-06-25 09:00:53 -070049 if match:
Dennis Kempin253ee052013-07-01 14:58:22 -070050 self.timestamp = float(match.group(1))
51 self.key = match.group(2)
52 self.operator = match.group(3)
53 self.value = match.group(4)
54 self.ismtstat = True
55
Dennis Kempin6e03a432013-06-25 09:00:53 -070056
Dennis Kempin19e972b2013-06-20 13:21:38 -070057class MTStat(object):
Dennis Kempin253ee052013-07-01 14:58:22 -070058
59 search_operators = {
60 ">": lambda a, b: float(a) > float(b),
61 ">=": lambda a, b: float(a) >= float(b),
62 "<": lambda a, b: float(a) < float(b),
63 "<=": lambda a, b: float(a) <= float(b),
64 "=": lambda a, b: str(a) == str(b),
65 "==": lambda a, b: str(a) == str(b),
66 "!=": lambda a, b: str(a) != str(b)
67 }
68
69 def FindMatchesInFile(self, filename, regex_str=None):
70 """ Searchs all MTStat entries from a log file
71
72 Replays a logfile and searched for MTStat log entries.
73 Returns two values, a list of SearchMarches and the number of
74 SYN reports that has been processed.
75 """
76 try:
77 log = Log(filename)
78
79 # find platform for file
80 replay = MTReplay()
81 platform = replay.PlatformOf(log)
82 if not platform:
83 print 'No platform for %s' % os.path.basename(filename)
84 return ([], 0)
85
86 # count the number of syn reports in log file
87 num_syn = len(tuple(re.finditer("0000 0000 0", log.evdev)))
88
89 # run replay
90 result = replay.Replay(log)
91
92 # per default we search for MTStat lines
93 if regex_str:
94 regex = re.compile(regex_str)
95 else:
96 regex = SearchMatch.mtstat_regex
97
98 # find all lines matching the regex
99 stats = []
100 for line in result.gestures_log.splitlines():
101 if regex.search(line):
102 stats.append(SearchMatch(line, filename))
103 return (stats, num_syn)
104 except:
105 print filename, traceback.format_exc()
106 return ([], 0)
107
108 def _FindAllMatches(self, number, parallel, search_regex=None):
109 # make sure the replay binaries are up to date
110 MTReplay().Recompile()
111
112 # list all log files
113 files = [os.path.abspath(os.path.join(log_dir, f))
114 for f in os.listdir(log_dir)
115 if f.isdigit()]
116
117 # randomly select subset of files
118 if number is not None:
119 files = random.sample(files, number)
120
121 # arguments for MTStatSearchSubprocess
122 parameters = [(file, search_regex) for file in files]
123
124 print "Processing %d log files" % len(files)
125
126 # process all files either in parallel or sequential
127 if parallel:
128 pool = multiprocessing.Pool()
129 pool_results = pool.map(MTStatSearchSubprocess, parameters)
130 pool.terminate()
131 else:
132 pool_results = map(MTStatSearchSubprocess, parameters)
133
134 # merge results of each file into one big list
135 entries = []
136 syn_count = 0
137 for file_entries, file_syn_count in pool_results:
138 syn_count = syn_count + file_syn_count
139
140 # the = operator is used to make only the latest
141 # entry of each file count.
142 file_uniques = {}
143 for entry in file_entries:
144 if entry.operator == '=':
145 file_uniques[entry.key] = entry
146 else:
147 entries.append(entry)
148
149 # add the last value of each unique entry
150 for value in file_uniques.values():
151 entries.append(value)
152
153 # syn reports are coming at approx 60 Hz on most platforms
154 syn_per_second = 60.0
155 hours = syn_count / syn_per_second / 60.0 / 60.0
156
157 print "Processed ~%.2f hours of interaction" % hours
158
159 return entries
160
161 def Search(self, query=None, regex=None, number=None, parallel=True):
162 """ Search for occurences of a specific tag or regex.
163
164 Specify either a 'query' or a 'regex'. Queries are formatted
165 in a simple "key operator value" format. For example:
166 "MyKey > 5" will return all matches where MyKey has a value
167 of greater than 5.
168 Supported operators are: >, <, >=, <=, =, !=
169
170 number: optional number of random reports to use
171 parallel: use parallel processing
172
173 returns a dictionary of lists containing the matches
174 for each file.
175 """
176 entries = self._FindAllMatches(number, parallel, search_regex=regex)
177
178 if query:
179 match = re.match("\s*(\w+)\s*([<>=!]+)\s*([0-9a-zA-Z]+)\s*", query)
180 if not match:
181 print query, " is not a valid query"
182 return {}
183 search_key = match.group(1)
184 search_op = MTStat.search_operators[match.group(2)]
185 search_value = match.group(3)
186
187 entries_by_file = collections.defaultdict(list)
188 for entry in entries:
189 if query is None or (entry.key == search_key and
190 search_op(entry.value, search_value)):
191 entries_by_file[entry.file].append(entry)
192 return entries_by_file
193
194 def GatherStats(self, number=None, parallel=True, num_bins=10):
195 """ Gathers stats on feedback reports.
196
197 Returns a dictionary with a histogram for each recorded key.
198 """
199 entries = self._FindAllMatches(number, parallel)
200
201 # gather values for each key in a list
202 value_collection = collections.defaultdict(list)
203 for entry in entries:
204 value_collection[entry.key].append(entry.value)
205
206 # build histograms
207 histograms = {}
208 for key, values in value_collection.items():
209 histograms[key] = self._Histogram(values, num_bins)
210 return SortedDict(histograms)
211
212 def _Histogram(self, values, num_bins):
213 def RawHistogram(values):
214 return SortedDict(collections.Counter(values))
215
216 # convert all items to integers.
217 integers = []
218 for value in values:
219 try:
220 integers.append(int(value))
221 except:
222 # not an integer.
223 return RawHistogram(values)
224
225 # don't condense lists that are already small enough
226 if len(set(integers)) <= num_bins:
227 return RawHistogram(integers)
228
229 # all integer values, use bins for histogram
230 histogram = collections.OrderedDict()
231 integers = sorted(integers)
232
233 # calculate bin size (append one at the end to include last value)
234 begin = integers[0]
235 end = integers[-1] + 1
236 bin_size = float(end - begin) / float(num_bins)
237
238 # remove each bins integers from the list and count them.
239 for i in range(num_bins):
240 high_v = round((i + 1) * bin_size) + begin
241
242 filtered = filter(lambda i: i >= high_v, integers)
243 histogram["<%d" % high_v] = len(integers) - len(filtered)
244 integers = filtered
245 return histogram
246
Dennis Kempin19e972b2013-06-20 13:21:38 -0700247 def DownloadFile(self, id):
248 """Download one feedback log into cache."""
249 downloader = FeedbackDownloader()
250
251 filename = os.path.join(log_dir, id)
252 if os.path.exists(filename):
253 print 'Skipping existing report', id
254 return
255
256 print 'Downloading new report', id
257 try:
258 # might throw IO/Tar/Zip/etc exceptions
259 report = FeedbackLog(id, force_latest='pad')
Dennis Kempin55af9cc2013-06-20 15:07:21 -0700260 # Test parse. Will throw exception on malformed log
261 json.loads(report.activity)
Dennis Kempin19e972b2013-06-20 13:21:38 -0700262 except:
263 print 'Invalid report %s' % id
Dennis Kempin55af9cc2013-06-20 15:07:21 -0700264 return
265
Dennis Kempin19e972b2013-06-20 13:21:38 -0700266 # check if report contains logs and actual events
267 if report.activity and report.evdev and 'E:' in report.evdev:
268 report.SaveAs(filename)
269 else:
270 print 'Invalid report %s' % id
271
272 def Download(self, num, offset=0, parallel=True):
273 """Download 'num' new feedback logs into cache."""
274 downloader = FeedbackDownloader()
275
276 # download list of feedback report id's
277 params = {
278 '$limit': str(num),
279 '$offset': str(offset),
280 'mapping': ':list',
281 'productId': '208' # ChromeOS
282 }
283 url = ('https://feedback.corp.google.com/resty/ReportSearch?' +
284 urllib.urlencode(params))
285 data = downloader.DownloadFile(url)
286 data = data[data.find('{'):] # strip garbage before json data
287
288 reports_json = json.loads(data)
289 report_ids = [item['id'] for item in reports_json['results']]
290
291 # Download and check each report
292 if parallel:
293 pool = multiprocessing.Pool()
294 results = pool.map(MTStatDownloadSubprocess, report_ids)
295 pool.terminate()
296 else:
297 results = map(MTStatDownloadSubprocess, report_ids)