Blame - mtstat/mtstat.py - chromium.googlesource.com/chromiumos/platform/mttools

blob: eed9335f722075994dafb2c60bcaad3c65b128f2 [file] [log] [blame]

Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	1	#! /usr/bin/env python
				2	# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5	from mtlib.log import Log, FeedbackDownloader, FeedbackLog
				6	from mtreplay import MTReplay
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame^]	7	import collections
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	8	import json
				9	import multiprocessing
				10	import os
				11	import random
				12	import re
Dennis Kempin	55af9cc	2013-06-20 15:07:21 -0700	[diff] [blame]	13	import traceback
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	14	import urllib
				15
				16	script_dir = os.path.dirname(os.path.realpath(__file__))
				17	log_dir = os.path.join(script_dir, '../cache/logs/')
				18	if not os.path.exists(log_dir):
				19	os.mkdir(log_dir)
				20
				21
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame^]	22	def MTStatGatherSubprocess(filename):
				23	return MTStat().GatherEntriesFromFile(filename)
				24
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	25
				26	def MTStatDownloadSubprocess(id):
				27	return MTStat().DownloadFile(id)
				28
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame^]	29
				30	class MTStatEntry(object):
				31	TAG = 'MTStat:'
				32	def __init__(self, logline, id):
				33	self.logline = logline
				34	self.id = id
				35	startidx = logline.find(MTStatEntry.TAG) + len(MTStatEntry.TAG)
				36	self.tagline = logline[startidx:]
				37
				38	@property
				39	def timestamp(self):
				40	match = re.search('([0-9]+\.[0-9]+)\:MTStat', self.logline)
				41	if match:
				42	return float(match.group(1))
				43	return 0.0
				44
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	45	class MTStat(object):
				46	def DownloadFile(self, id):
				47	"""Download one feedback log into cache."""
				48	downloader = FeedbackDownloader()
				49
				50	filename = os.path.join(log_dir, id)
				51	if os.path.exists(filename):
				52	print 'Skipping existing report', id
				53	return
				54
				55	print 'Downloading new report', id
				56	try:
				57	# might throw IO/Tar/Zip/etc exceptions
				58	report = FeedbackLog(id, force_latest='pad')
Dennis Kempin	55af9cc	2013-06-20 15:07:21 -0700	[diff] [blame]	59	# Test parse. Will throw exception on malformed log
				60	json.loads(report.activity)
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	61	except:
				62	print 'Invalid report %s' % id
Dennis Kempin	55af9cc	2013-06-20 15:07:21 -0700	[diff] [blame]	63	return
				64
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	65	# check if report contains logs and actual events
				66	if report.activity and report.evdev and 'E:' in report.evdev:
				67	report.SaveAs(filename)
				68	else:
				69	print 'Invalid report %s' % id
				70
				71	def Download(self, num, offset=0, parallel=True):
				72	"""Download 'num' new feedback logs into cache."""
				73	downloader = FeedbackDownloader()
				74
				75	# download list of feedback report id's
				76	params = {
				77	'$limit': str(num),
				78	'$offset': str(offset),
				79	'mapping': ':list',
				80	'productId': '208' # ChromeOS
				81	}
				82	url = ('https://feedback.corp.google.com/resty/ReportSearch?' +
				83	urllib.urlencode(params))
				84	data = downloader.DownloadFile(url)
				85	data = data[data.find('{'):] # strip garbage before json data
				86
				87	reports_json = json.loads(data)
				88	report_ids = [item['id'] for item in reports_json['results']]
				89
				90	# Download and check each report
				91	if parallel:
				92	pool = multiprocessing.Pool()
				93	results = pool.map(MTStatDownloadSubprocess, report_ids)
				94	pool.terminate()
				95	else:
				96	results = map(MTStatDownloadSubprocess, report_ids)
				97
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame^]	98	def GatherEntriesFromFile(self, filename):
				99	""" Gathers all MTStat entries from a log file
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	100
				101	Replays a logfile and searched for MTStat log entries.
				102	The return value is a list of all log entries found.
				103	"""
Dennis Kempin	55af9cc	2013-06-20 15:07:21 -0700	[diff] [blame]	104	try:
				105	log = Log(filename)
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame^]	106	id = os.path.basename(filename)
Dennis Kempin	55af9cc	2013-06-20 15:07:21 -0700	[diff] [blame]	107	replay = MTReplay()
				108	platform = replay.PlatformOf(log)
				109	if not platform:
				110	print 'No platform for %s' % os.path.basename(filename)
				111	return ([], 0)
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	112
Dennis Kempin	55af9cc	2013-06-20 15:07:21 -0700	[diff] [blame]	113	# count the number of syn reports in log file
				114	num_syn = len(tuple(re.finditer("0000 0000 0", log.evdev)))
				115
				116	result = replay.Replay(log)
				117	stats = []
				118	for line in result.gestures_log.splitlines():
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame^]	119	if MTStatEntry.TAG in line:
				120	stats.append(MTStatEntry(line, filename))
Dennis Kempin	55af9cc	2013-06-20 15:07:21 -0700	[diff] [blame]	121	return (stats, num_syn)
				122	except:
				123	print filename, traceback.format_exc()
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	124	return ([], 0)
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	125
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame^]	126	def _GatherEntries(self, number, parallel):
				127	files = [os.path.abspath(os.path.join(log_dir, f))
				128	for f in os.listdir(log_dir)
				129	if f.isdigit()]
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	130
				131	if number is not None:
				132	files = random.sample(files, number)
				133
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame^]	134	print "Processing %d log files" % len(files)
				135
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	136	if parallel:
				137	pool = multiprocessing.Pool()
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame^]	138	pool_results = pool.map(MTStatGatherSubprocess, files)
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	139	pool.terminate()
				140	else:
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame^]	141	pool_results = map(MTStatGatherSubprocess, files)
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	142
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame^]	143	entries = []
				144	syn_count = 0
				145	for pool_entries, pool_syn_count in pool_results:
				146	syn_count = syn_count + pool_syn_count
				147	entries.extend(pool_entries)
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	148
Dennis Kempin	55af9cc	2013-06-20 15:07:21 -0700	[diff] [blame]	149	# syn reports are coming at approx 60 Hz on most platforms
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	150	syn_per_second = 60.0
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame^]	151	hours = syn_count / syn_per_second / 60.0 / 60.0
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	152
Dennis Kempin	55af9cc	2013-06-20 15:07:21 -0700	[diff] [blame]	153	print "Processed ~%.2f hours of interaction" % hours
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame^]	154
				155	return entries
				156
				157	def Search(self, search, number=None, parallel=True):
				158	""" Search for occurences of a specific tag
				159
				160	number: optional number of random reports to use
				161	parallel: use parallel processing
				162
				163	returns a dictionary of lists containing the matches
				164	for each file.
				165	"""
				166	entries = self._GatherEntries(number, parallel)
				167
				168	entries_by_id = collections.defaultdict(list)
				169	for entry in entries:
				170	if entry.tagline == search:
				171	entries_by_id[entry.id].append(entry)
				172	return entries_by_id
				173
				174	def GatherStats(self, number=None, parallel=True):
				175	""" Gathers stats on feedback reports.
				176
				177	Each tag is part of a group, with group names separated
				178	by colons. Each group can also have sub-groups for example:
				179
				180	GroupA:ValueA
				181	GroupA:ValueB
				182	GroupA:Subgroup:ValueA
				183	GroupA:Subgroup:ValueB
				184
				185	number: optional number of random reports to use
				186	parallel: use parallel processing
				187
				188	returns a dictionary that maps a tag to a tuple of
				189	it's number of occurences and it's ratio.
				190	The ratios are calculated within each group (excluding sub-groups)
				191	"""
				192	entries = self._GatherEntries(number, parallel)
				193
				194	# build histogram of stat lines
				195	counts = collections.defaultdict(lambda: 0)
				196	num_syn = 0
				197	for entry in entries:
				198	counts[entry.tagline] = counts[entry.tagline] + 1
				199
				200	def get_parent(tagline):
				201	return ":".join(tagline.split(":")[:-1])
				202
				203	# calculate ratios
				204	ratios = {}
				205	for tagline, count in counts.items():
				206	parent = get_parent(tagline)
				207	sibling_sum = 0
				208	for other, other_count in counts.items():
				209	if get_parent(other) == parent:
				210	sibling_sum = sibling_sum + other_count
				211	ratios[tagline] = float(count) / float(sibling_sum)
				212
				213	stats = {}
				214	for key in counts.keys():
				215	stats[key] = (counts[key], ratios[key])
				216
				217	return collections.OrderedDict(sorted(stats.iteritems()))