blob: eed9335f722075994dafb2c60bcaad3c65b128f2 [file] [log] [blame]
Dennis Kempin19e972b2013-06-20 13:21:38 -07001#! /usr/bin/env python
2# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5from mtlib.log import Log, FeedbackDownloader, FeedbackLog
6from mtreplay import MTReplay
Dennis Kempin6e03a432013-06-25 09:00:53 -07007import collections
Dennis Kempin19e972b2013-06-20 13:21:38 -07008import json
9import multiprocessing
10import os
11import random
12import re
Dennis Kempin55af9cc2013-06-20 15:07:21 -070013import traceback
Dennis Kempin19e972b2013-06-20 13:21:38 -070014import urllib
15
16script_dir = os.path.dirname(os.path.realpath(__file__))
17log_dir = os.path.join(script_dir, '../cache/logs/')
18if not os.path.exists(log_dir):
19 os.mkdir(log_dir)
20
21
Dennis Kempin6e03a432013-06-25 09:00:53 -070022def MTStatGatherSubprocess(filename):
23 return MTStat().GatherEntriesFromFile(filename)
24
Dennis Kempin19e972b2013-06-20 13:21:38 -070025
26def MTStatDownloadSubprocess(id):
27 return MTStat().DownloadFile(id)
28
Dennis Kempin6e03a432013-06-25 09:00:53 -070029
30class MTStatEntry(object):
31 TAG = 'MTStat:'
32 def __init__(self, logline, id):
33 self.logline = logline
34 self.id = id
35 startidx = logline.find(MTStatEntry.TAG) + len(MTStatEntry.TAG)
36 self.tagline = logline[startidx:]
37
38 @property
39 def timestamp(self):
40 match = re.search('([0-9]+\.[0-9]+)\:MTStat', self.logline)
41 if match:
42 return float(match.group(1))
43 return 0.0
44
Dennis Kempin19e972b2013-06-20 13:21:38 -070045class MTStat(object):
46 def DownloadFile(self, id):
47 """Download one feedback log into cache."""
48 downloader = FeedbackDownloader()
49
50 filename = os.path.join(log_dir, id)
51 if os.path.exists(filename):
52 print 'Skipping existing report', id
53 return
54
55 print 'Downloading new report', id
56 try:
57 # might throw IO/Tar/Zip/etc exceptions
58 report = FeedbackLog(id, force_latest='pad')
Dennis Kempin55af9cc2013-06-20 15:07:21 -070059 # Test parse. Will throw exception on malformed log
60 json.loads(report.activity)
Dennis Kempin19e972b2013-06-20 13:21:38 -070061 except:
62 print 'Invalid report %s' % id
Dennis Kempin55af9cc2013-06-20 15:07:21 -070063 return
64
Dennis Kempin19e972b2013-06-20 13:21:38 -070065 # check if report contains logs and actual events
66 if report.activity and report.evdev and 'E:' in report.evdev:
67 report.SaveAs(filename)
68 else:
69 print 'Invalid report %s' % id
70
71 def Download(self, num, offset=0, parallel=True):
72 """Download 'num' new feedback logs into cache."""
73 downloader = FeedbackDownloader()
74
75 # download list of feedback report id's
76 params = {
77 '$limit': str(num),
78 '$offset': str(offset),
79 'mapping': ':list',
80 'productId': '208' # ChromeOS
81 }
82 url = ('https://feedback.corp.google.com/resty/ReportSearch?' +
83 urllib.urlencode(params))
84 data = downloader.DownloadFile(url)
85 data = data[data.find('{'):] # strip garbage before json data
86
87 reports_json = json.loads(data)
88 report_ids = [item['id'] for item in reports_json['results']]
89
90 # Download and check each report
91 if parallel:
92 pool = multiprocessing.Pool()
93 results = pool.map(MTStatDownloadSubprocess, report_ids)
94 pool.terminate()
95 else:
96 results = map(MTStatDownloadSubprocess, report_ids)
97
Dennis Kempin6e03a432013-06-25 09:00:53 -070098 def GatherEntriesFromFile(self, filename):
99 """ Gathers all MTStat entries from a log file
Dennis Kempin19e972b2013-06-20 13:21:38 -0700100
101 Replays a logfile and searched for MTStat log entries.
102 The return value is a list of all log entries found.
103 """
Dennis Kempin55af9cc2013-06-20 15:07:21 -0700104 try:
105 log = Log(filename)
Dennis Kempin6e03a432013-06-25 09:00:53 -0700106 id = os.path.basename(filename)
Dennis Kempin55af9cc2013-06-20 15:07:21 -0700107 replay = MTReplay()
108 platform = replay.PlatformOf(log)
109 if not platform:
110 print 'No platform for %s' % os.path.basename(filename)
111 return ([], 0)
Dennis Kempin19e972b2013-06-20 13:21:38 -0700112
Dennis Kempin55af9cc2013-06-20 15:07:21 -0700113 # count the number of syn reports in log file
114 num_syn = len(tuple(re.finditer("0000 0000 0", log.evdev)))
115
116 result = replay.Replay(log)
117 stats = []
118 for line in result.gestures_log.splitlines():
Dennis Kempin6e03a432013-06-25 09:00:53 -0700119 if MTStatEntry.TAG in line:
120 stats.append(MTStatEntry(line, filename))
Dennis Kempin55af9cc2013-06-20 15:07:21 -0700121 return (stats, num_syn)
122 except:
123 print filename, traceback.format_exc()
Dennis Kempin19e972b2013-06-20 13:21:38 -0700124 return ([], 0)
Dennis Kempin19e972b2013-06-20 13:21:38 -0700125
Dennis Kempin6e03a432013-06-25 09:00:53 -0700126 def _GatherEntries(self, number, parallel):
127 files = [os.path.abspath(os.path.join(log_dir, f))
128 for f in os.listdir(log_dir)
129 if f.isdigit()]
Dennis Kempin19e972b2013-06-20 13:21:38 -0700130
131 if number is not None:
132 files = random.sample(files, number)
133
Dennis Kempin6e03a432013-06-25 09:00:53 -0700134 print "Processing %d log files" % len(files)
135
Dennis Kempin19e972b2013-06-20 13:21:38 -0700136 if parallel:
137 pool = multiprocessing.Pool()
Dennis Kempin6e03a432013-06-25 09:00:53 -0700138 pool_results = pool.map(MTStatGatherSubprocess, files)
Dennis Kempin19e972b2013-06-20 13:21:38 -0700139 pool.terminate()
140 else:
Dennis Kempin6e03a432013-06-25 09:00:53 -0700141 pool_results = map(MTStatGatherSubprocess, files)
Dennis Kempin19e972b2013-06-20 13:21:38 -0700142
Dennis Kempin6e03a432013-06-25 09:00:53 -0700143 entries = []
144 syn_count = 0
145 for pool_entries, pool_syn_count in pool_results:
146 syn_count = syn_count + pool_syn_count
147 entries.extend(pool_entries)
Dennis Kempin19e972b2013-06-20 13:21:38 -0700148
Dennis Kempin55af9cc2013-06-20 15:07:21 -0700149 # syn reports are coming at approx 60 Hz on most platforms
Dennis Kempin19e972b2013-06-20 13:21:38 -0700150 syn_per_second = 60.0
Dennis Kempin6e03a432013-06-25 09:00:53 -0700151 hours = syn_count / syn_per_second / 60.0 / 60.0
Dennis Kempin19e972b2013-06-20 13:21:38 -0700152
Dennis Kempin55af9cc2013-06-20 15:07:21 -0700153 print "Processed ~%.2f hours of interaction" % hours
Dennis Kempin6e03a432013-06-25 09:00:53 -0700154
155 return entries
156
157 def Search(self, search, number=None, parallel=True):
158 """ Search for occurences of a specific tag
159
160 number: optional number of random reports to use
161 parallel: use parallel processing
162
163 returns a dictionary of lists containing the matches
164 for each file.
165 """
166 entries = self._GatherEntries(number, parallel)
167
168 entries_by_id = collections.defaultdict(list)
169 for entry in entries:
170 if entry.tagline == search:
171 entries_by_id[entry.id].append(entry)
172 return entries_by_id
173
174 def GatherStats(self, number=None, parallel=True):
175 """ Gathers stats on feedback reports.
176
177 Each tag is part of a group, with group names separated
178 by colons. Each group can also have sub-groups for example:
179
180 GroupA:ValueA
181 GroupA:ValueB
182 GroupA:Subgroup:ValueA
183 GroupA:Subgroup:ValueB
184
185 number: optional number of random reports to use
186 parallel: use parallel processing
187
188 returns a dictionary that maps a tag to a tuple of
189 it's number of occurences and it's ratio.
190 The ratios are calculated within each group (excluding sub-groups)
191 """
192 entries = self._GatherEntries(number, parallel)
193
194 # build histogram of stat lines
195 counts = collections.defaultdict(lambda: 0)
196 num_syn = 0
197 for entry in entries:
198 counts[entry.tagline] = counts[entry.tagline] + 1
199
200 def get_parent(tagline):
201 return ":".join(tagline.split(":")[:-1])
202
203 # calculate ratios
204 ratios = {}
205 for tagline, count in counts.items():
206 parent = get_parent(tagline)
207 sibling_sum = 0
208 for other, other_count in counts.items():
209 if get_parent(other) == parent:
210 sibling_sum = sibling_sum + other_count
211 ratios[tagline] = float(count) / float(sibling_sum)
212
213 stats = {}
214 for key in counts.keys():
215 stats[key] = (counts[key], ratios[key])
216
217 return collections.OrderedDict(sorted(stats.iteritems()))