Dennis Kempin | 19e972b | 2013-06-20 13:21:38 -0700 | [diff] [blame^] | 1 | #! /usr/bin/env python |
| 2 | # Copyright (c) 2013 The Chromium OS Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | from mtlib.log import Log, FeedbackDownloader, FeedbackLog |
| 6 | from mtreplay import MTReplay |
| 7 | import json |
| 8 | import multiprocessing |
| 9 | import os |
| 10 | import random |
| 11 | import re |
| 12 | import urllib |
| 13 | |
| 14 | script_dir = os.path.dirname(os.path.realpath(__file__)) |
| 15 | log_dir = os.path.join(script_dir, '../cache/logs/') |
| 16 | if not os.path.exists(log_dir): |
| 17 | os.mkdir(log_dir) |
| 18 | |
| 19 | |
| 20 | def MTStatSubprocess(filename): |
| 21 | return MTStat().GatherStatsFromFile(filename) |
| 22 | |
| 23 | def MTStatDownloadSubprocess(id): |
| 24 | return MTStat().DownloadFile(id) |
| 25 | |
| 26 | class MTStat(object): |
| 27 | def DownloadFile(self, id): |
| 28 | """Download one feedback log into cache.""" |
| 29 | downloader = FeedbackDownloader() |
| 30 | |
| 31 | filename = os.path.join(log_dir, id) |
| 32 | if os.path.exists(filename): |
| 33 | print 'Skipping existing report', id |
| 34 | return |
| 35 | |
| 36 | print 'Downloading new report', id |
| 37 | try: |
| 38 | # might throw IO/Tar/Zip/etc exceptions |
| 39 | report = FeedbackLog(id, force_latest='pad') |
| 40 | except: |
| 41 | print 'Invalid report %s' % id |
| 42 | # check if report contains logs and actual events |
| 43 | if report.activity and report.evdev and 'E:' in report.evdev: |
| 44 | report.SaveAs(filename) |
| 45 | else: |
| 46 | print 'Invalid report %s' % id |
| 47 | |
| 48 | def Download(self, num, offset=0, parallel=True): |
| 49 | """Download 'num' new feedback logs into cache.""" |
| 50 | downloader = FeedbackDownloader() |
| 51 | |
| 52 | # download list of feedback report id's |
| 53 | params = { |
| 54 | '$limit': str(num), |
| 55 | '$offset': str(offset), |
| 56 | 'mapping': ':list', |
| 57 | 'productId': '208' # ChromeOS |
| 58 | } |
| 59 | url = ('https://feedback.corp.google.com/resty/ReportSearch?' + |
| 60 | urllib.urlencode(params)) |
| 61 | data = downloader.DownloadFile(url) |
| 62 | data = data[data.find('{'):] # strip garbage before json data |
| 63 | |
| 64 | reports_json = json.loads(data) |
| 65 | report_ids = [item['id'] for item in reports_json['results']] |
| 66 | |
| 67 | # Download and check each report |
| 68 | if parallel: |
| 69 | pool = multiprocessing.Pool() |
| 70 | results = pool.map(MTStatDownloadSubprocess, report_ids) |
| 71 | pool.terminate() |
| 72 | else: |
| 73 | results = map(MTStatDownloadSubprocess, report_ids) |
| 74 | |
| 75 | def GatherStatsFromFile(self, filename): |
| 76 | """ Gathers statistics on a log file |
| 77 | |
| 78 | Replays a logfile and searched for MTStat log entries. |
| 79 | The return value is a list of all log entries found. |
| 80 | """ |
| 81 | TAG = 'MTStat:' |
| 82 | |
| 83 | log = Log(filename) |
| 84 | replay = MTReplay() |
| 85 | platform = replay.PlatformOf(log) |
| 86 | if not platform: |
| 87 | print 'No platform for %s' % os.path.basename(filename) |
| 88 | return ([], 0) |
| 89 | print 'Replaying %s on platform %s' % (os.path.basename(filename), |
| 90 | platform.name) |
| 91 | |
| 92 | # count the number of syn reports in log file |
| 93 | num_syn = len(tuple(re.finditer("0000 0000 0", log.evdev))) |
| 94 | |
| 95 | result = replay.Replay(log) |
| 96 | stats = [] |
| 97 | for line in result.gestures_log.splitlines(): |
| 98 | if TAG in line: |
| 99 | stats.append(line[line.find(TAG) + len(TAG):]) |
| 100 | return (stats, num_syn) |
| 101 | |
| 102 | def GatherStats(self, number=None, parallel=True): |
| 103 | """ Gathers stats on feedback reports. |
| 104 | |
| 105 | number: optional number of random reports to use |
| 106 | parallel: use parallel processing |
| 107 | |
| 108 | returns a dictionary representing the histogram of all |
| 109 | MTStat log entries. |
| 110 | """ |
| 111 | files = [os.path.join(log_dir, f) for f in os.listdir(log_dir) |
| 112 | if f.isdigit()] |
| 113 | |
| 114 | if number is not None: |
| 115 | files = random.sample(files, number) |
| 116 | |
| 117 | if parallel: |
| 118 | pool = multiprocessing.Pool() |
| 119 | results = pool.map(MTStatSubprocess, files) |
| 120 | pool.terminate() |
| 121 | else: |
| 122 | results = map(MTStatSubprocess, files) |
| 123 | |
| 124 | # build histogram of stat lines |
| 125 | stats = {} |
| 126 | num_syn = 0 |
| 127 | for stat_lines, log_syn in results: |
| 128 | num_syn = num_syn + log_syn |
| 129 | for line in stat_lines: |
| 130 | if line not in stats: |
| 131 | stats[line] = 0 |
| 132 | stats[line] = stats[line] + 1 |
| 133 | |
| 134 | # syn reports are coming at 30 Hz on most platforms |
| 135 | syn_per_second = 60.0 |
| 136 | hours = num_syn / syn_per_second / 60.0 / 60.0 |
| 137 | |
| 138 | print "Processed %.2f hours of interaction)" % hours |
| 139 | return stats |