Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 1 | #! /usr/bin/env python |
| 2 | # Copyright (c) 2013 The Chromium OS Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | from mtlib.log import Log, FeedbackDownloader, FeedbackLog |
| 6 | from mtreplay import MTReplay |
Dennis Kempin | 6e03a43 | 2013-06-25 09:00:53 -0700 | [diff] [blame^] | 7 | import collections |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 8 | import json |
| 9 | import multiprocessing |
| 10 | import os |
| 11 | import random |
| 12 | import re |
Dennis Kempin | 55af9cc | 2013-06-20 15:07:21 -0700 | [diff] [blame] | 13 | import traceback |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 14 | import urllib |
| 15 | |
| 16 | script_dir = os.path.dirname(os.path.realpath(__file__)) |
| 17 | log_dir = os.path.join(script_dir, '../cache/logs/') |
| 18 | if not os.path.exists(log_dir): |
| 19 | os.mkdir(log_dir) |
| 20 | |
| 21 | |
Dennis Kempin | 6e03a43 | 2013-06-25 09:00:53 -0700 | [diff] [blame^] | 22 | def MTStatGatherSubprocess(filename): |
| 23 | return MTStat().GatherEntriesFromFile(filename) |
| 24 | |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 25 | |
| 26 | def MTStatDownloadSubprocess(id): |
| 27 | return MTStat().DownloadFile(id) |
| 28 | |
Dennis Kempin | 6e03a43 | 2013-06-25 09:00:53 -0700 | [diff] [blame^] | 29 | |
| 30 | class MTStatEntry(object): |
| 31 | TAG = 'MTStat:' |
| 32 | def __init__(self, logline, id): |
| 33 | self.logline = logline |
| 34 | self.id = id |
| 35 | startidx = logline.find(MTStatEntry.TAG) + len(MTStatEntry.TAG) |
| 36 | self.tagline = logline[startidx:] |
| 37 | |
| 38 | @property |
| 39 | def timestamp(self): |
| 40 | match = re.search('([0-9]+\.[0-9]+)\:MTStat', self.logline) |
| 41 | if match: |
| 42 | return float(match.group(1)) |
| 43 | return 0.0 |
| 44 | |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 45 | class MTStat(object): |
| 46 | def DownloadFile(self, id): |
| 47 | """Download one feedback log into cache.""" |
| 48 | downloader = FeedbackDownloader() |
| 49 | |
| 50 | filename = os.path.join(log_dir, id) |
| 51 | if os.path.exists(filename): |
| 52 | print 'Skipping existing report', id |
| 53 | return |
| 54 | |
| 55 | print 'Downloading new report', id |
| 56 | try: |
| 57 | # might throw IO/Tar/Zip/etc exceptions |
| 58 | report = FeedbackLog(id, force_latest='pad') |
Dennis Kempin | 55af9cc | 2013-06-20 15:07:21 -0700 | [diff] [blame] | 59 | # Test parse. Will throw exception on malformed log |
| 60 | json.loads(report.activity) |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 61 | except: |
| 62 | print 'Invalid report %s' % id |
Dennis Kempin | 55af9cc | 2013-06-20 15:07:21 -0700 | [diff] [blame] | 63 | return |
| 64 | |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 65 | # check if report contains logs and actual events |
| 66 | if report.activity and report.evdev and 'E:' in report.evdev: |
| 67 | report.SaveAs(filename) |
| 68 | else: |
| 69 | print 'Invalid report %s' % id |
| 70 | |
| 71 | def Download(self, num, offset=0, parallel=True): |
| 72 | """Download 'num' new feedback logs into cache.""" |
| 73 | downloader = FeedbackDownloader() |
| 74 | |
| 75 | # download list of feedback report id's |
| 76 | params = { |
| 77 | '$limit': str(num), |
| 78 | '$offset': str(offset), |
| 79 | 'mapping': ':list', |
| 80 | 'productId': '208' # ChromeOS |
| 81 | } |
| 82 | url = ('https://feedback.corp.google.com/resty/ReportSearch?' + |
| 83 | urllib.urlencode(params)) |
| 84 | data = downloader.DownloadFile(url) |
| 85 | data = data[data.find('{'):] # strip garbage before json data |
| 86 | |
| 87 | reports_json = json.loads(data) |
| 88 | report_ids = [item['id'] for item in reports_json['results']] |
| 89 | |
| 90 | # Download and check each report |
| 91 | if parallel: |
| 92 | pool = multiprocessing.Pool() |
| 93 | results = pool.map(MTStatDownloadSubprocess, report_ids) |
| 94 | pool.terminate() |
| 95 | else: |
| 96 | results = map(MTStatDownloadSubprocess, report_ids) |
| 97 | |
Dennis Kempin | 6e03a43 | 2013-06-25 09:00:53 -0700 | [diff] [blame^] | 98 | def GatherEntriesFromFile(self, filename): |
| 99 | """ Gathers all MTStat entries from a log file |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 100 | |
| 101 | Replays a logfile and searched for MTStat log entries. |
| 102 | The return value is a list of all log entries found. |
| 103 | """ |
Dennis Kempin | 55af9cc | 2013-06-20 15:07:21 -0700 | [diff] [blame] | 104 | try: |
| 105 | log = Log(filename) |
Dennis Kempin | 6e03a43 | 2013-06-25 09:00:53 -0700 | [diff] [blame^] | 106 | id = os.path.basename(filename) |
Dennis Kempin | 55af9cc | 2013-06-20 15:07:21 -0700 | [diff] [blame] | 107 | replay = MTReplay() |
| 108 | platform = replay.PlatformOf(log) |
| 109 | if not platform: |
| 110 | print 'No platform for %s' % os.path.basename(filename) |
| 111 | return ([], 0) |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 112 | |
Dennis Kempin | 55af9cc | 2013-06-20 15:07:21 -0700 | [diff] [blame] | 113 | # count the number of syn reports in log file |
| 114 | num_syn = len(tuple(re.finditer("0000 0000 0", log.evdev))) |
| 115 | |
| 116 | result = replay.Replay(log) |
| 117 | stats = [] |
| 118 | for line in result.gestures_log.splitlines(): |
Dennis Kempin | 6e03a43 | 2013-06-25 09:00:53 -0700 | [diff] [blame^] | 119 | if MTStatEntry.TAG in line: |
| 120 | stats.append(MTStatEntry(line, filename)) |
Dennis Kempin | 55af9cc | 2013-06-20 15:07:21 -0700 | [diff] [blame] | 121 | return (stats, num_syn) |
| 122 | except: |
| 123 | print filename, traceback.format_exc() |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 124 | return ([], 0) |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 125 | |
Dennis Kempin | 6e03a43 | 2013-06-25 09:00:53 -0700 | [diff] [blame^] | 126 | def _GatherEntries(self, number, parallel): |
| 127 | files = [os.path.abspath(os.path.join(log_dir, f)) |
| 128 | for f in os.listdir(log_dir) |
| 129 | if f.isdigit()] |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 130 | |
| 131 | if number is not None: |
| 132 | files = random.sample(files, number) |
| 133 | |
Dennis Kempin | 6e03a43 | 2013-06-25 09:00:53 -0700 | [diff] [blame^] | 134 | print "Processing %d log files" % len(files) |
| 135 | |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 136 | if parallel: |
| 137 | pool = multiprocessing.Pool() |
Dennis Kempin | 6e03a43 | 2013-06-25 09:00:53 -0700 | [diff] [blame^] | 138 | pool_results = pool.map(MTStatGatherSubprocess, files) |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 139 | pool.terminate() |
| 140 | else: |
Dennis Kempin | 6e03a43 | 2013-06-25 09:00:53 -0700 | [diff] [blame^] | 141 | pool_results = map(MTStatGatherSubprocess, files) |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 142 | |
Dennis Kempin | 6e03a43 | 2013-06-25 09:00:53 -0700 | [diff] [blame^] | 143 | entries = [] |
| 144 | syn_count = 0 |
| 145 | for pool_entries, pool_syn_count in pool_results: |
| 146 | syn_count = syn_count + pool_syn_count |
| 147 | entries.extend(pool_entries) |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 148 | |
Dennis Kempin | 55af9cc | 2013-06-20 15:07:21 -0700 | [diff] [blame] | 149 | # syn reports are coming at approx 60 Hz on most platforms |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 150 | syn_per_second = 60.0 |
Dennis Kempin | 6e03a43 | 2013-06-25 09:00:53 -0700 | [diff] [blame^] | 151 | hours = syn_count / syn_per_second / 60.0 / 60.0 |
Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame] | 152 | |
Dennis Kempin | 55af9cc | 2013-06-20 15:07:21 -0700 | [diff] [blame] | 153 | print "Processed ~%.2f hours of interaction" % hours |
Dennis Kempin | 6e03a43 | 2013-06-25 09:00:53 -0700 | [diff] [blame^] | 154 | |
| 155 | return entries |
| 156 | |
| 157 | def Search(self, search, number=None, parallel=True): |
| 158 | """ Search for occurences of a specific tag |
| 159 | |
| 160 | number: optional number of random reports to use |
| 161 | parallel: use parallel processing |
| 162 | |
| 163 | returns a dictionary of lists containing the matches |
| 164 | for each file. |
| 165 | """ |
| 166 | entries = self._GatherEntries(number, parallel) |
| 167 | |
| 168 | entries_by_id = collections.defaultdict(list) |
| 169 | for entry in entries: |
| 170 | if entry.tagline == search: |
| 171 | entries_by_id[entry.id].append(entry) |
| 172 | return entries_by_id |
| 173 | |
| 174 | def GatherStats(self, number=None, parallel=True): |
| 175 | """ Gathers stats on feedback reports. |
| 176 | |
| 177 | Each tag is part of a group, with group names separated |
| 178 | by colons. Each group can also have sub-groups for example: |
| 179 | |
| 180 | GroupA:ValueA |
| 181 | GroupA:ValueB |
| 182 | GroupA:Subgroup:ValueA |
| 183 | GroupA:Subgroup:ValueB |
| 184 | |
| 185 | number: optional number of random reports to use |
| 186 | parallel: use parallel processing |
| 187 | |
| 188 | returns a dictionary that maps a tag to a tuple of |
| 189 | it's number of occurences and it's ratio. |
| 190 | The ratios are calculated within each group (excluding sub-groups) |
| 191 | """ |
| 192 | entries = self._GatherEntries(number, parallel) |
| 193 | |
| 194 | # build histogram of stat lines |
| 195 | counts = collections.defaultdict(lambda: 0) |
| 196 | num_syn = 0 |
| 197 | for entry in entries: |
| 198 | counts[entry.tagline] = counts[entry.tagline] + 1 |
| 199 | |
| 200 | def get_parent(tagline): |
| 201 | return ":".join(tagline.split(":")[:-1]) |
| 202 | |
| 203 | # calculate ratios |
| 204 | ratios = {} |
| 205 | for tagline, count in counts.items(): |
| 206 | parent = get_parent(tagline) |
| 207 | sibling_sum = 0 |
| 208 | for other, other_count in counts.items(): |
| 209 | if get_parent(other) == parent: |
| 210 | sibling_sum = sibling_sum + other_count |
| 211 | ratios[tagline] = float(count) / float(sibling_sum) |
| 212 | |
| 213 | stats = {} |
| 214 | for key in counts.keys(): |
| 215 | stats[key] = (counts[key], ratios[key]) |
| 216 | |
| 217 | return collections.OrderedDict(sorted(stats.iteritems())) |