Blame - mtstat/mtstat.py - chromium.googlesource.com/chromiumos/platform/mttools

blob: d027789352194f986d60b9b6f98f0141a2e8a9dd [file] [log] [blame]

Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	1	#! /usr/bin/env python
				2	# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5	from mtlib.log import Log, FeedbackDownloader, FeedbackLog
				6	from mtreplay import MTReplay
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame]	7	import collections
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	8	import json
				9	import multiprocessing
				10	import os
				11	import random
				12	import re
Dennis Kempin	55af9cc	2013-06-20 15:07:21 -0700	[diff] [blame]	13	import traceback
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	14	import urllib
				15
				16	script_dir = os.path.dirname(os.path.realpath(__file__))
				17	log_dir = os.path.join(script_dir, '../cache/logs/')
				18	if not os.path.exists(log_dir):
				19	os.mkdir(log_dir)
				20
				21
Dennis Kempin	253ee05	2013-07-01 14:58:22 -0700	[diff] [blame^]	22	def SortedDict(dict):
				23	return collections.OrderedDict(sorted(dict.items(), key=lambda t: t[0]))
				24
				25
				26	def MTStatSearchSubprocess(params):
				27	return MTStat().FindMatchesInFile(params[0], params[1])
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame]	28
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	29
				30	def MTStatDownloadSubprocess(id):
				31	return MTStat().DownloadFile(id)
				32
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame]	33
Dennis Kempin	253ee05	2013-07-01 14:58:22 -0700	[diff] [blame^]	34	class SearchMatch(object):
				35	# Example: MTStat:124.321:Key=Value
				36	mtstat_regex = re.compile('MTStat:([0-9]+\.[0-9]+):(\w+)([=:])(\w+)')
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame]	37
Dennis Kempin	253ee05	2013-07-01 14:58:22 -0700	[diff] [blame^]	38	def __init__(self, line, file):
				39	self.line = line
				40	self.file = file
				41
				42	self.key = None
				43	self.value = None
				44	self.operator = None
				45	self.timestamp = None
				46	self.ismtstat = False
				47
				48	match = SearchMatch.mtstat_regex.search(self.line)
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame]	49	if match:
Dennis Kempin	253ee05	2013-07-01 14:58:22 -0700	[diff] [blame^]	50	self.timestamp = float(match.group(1))
				51	self.key = match.group(2)
				52	self.operator = match.group(3)
				53	self.value = match.group(4)
				54	self.ismtstat = True
				55
Dennis Kempin	6e03a43	2013-06-25 09:00:53 -0700	[diff] [blame]	56
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	57	class MTStat(object):
Dennis Kempin	253ee05	2013-07-01 14:58:22 -0700	[diff] [blame^]	58
				59	search_operators = {
				60	">": lambda a, b: float(a) > float(b),
				61	">=": lambda a, b: float(a) >= float(b),
				62	"<": lambda a, b: float(a) < float(b),
				63	"<=": lambda a, b: float(a) <= float(b),
				64	"=": lambda a, b: str(a) == str(b),
				65	"==": lambda a, b: str(a) == str(b),
				66	"!=": lambda a, b: str(a) != str(b)
				67	}
				68
				69	def FindMatchesInFile(self, filename, regex_str=None):
				70	""" Searchs all MTStat entries from a log file
				71
				72	Replays a logfile and searched for MTStat log entries.
				73	Returns two values, a list of SearchMarches and the number of
				74	SYN reports that has been processed.
				75	"""
				76	try:
				77	log = Log(filename)
				78
				79	# find platform for file
				80	replay = MTReplay()
				81	platform = replay.PlatformOf(log)
				82	if not platform:
				83	print 'No platform for %s' % os.path.basename(filename)
				84	return ([], 0)
				85
				86	# count the number of syn reports in log file
				87	num_syn = len(tuple(re.finditer("0000 0000 0", log.evdev)))
				88
				89	# run replay
				90	result = replay.Replay(log)
				91
				92	# per default we search for MTStat lines
				93	if regex_str:
				94	regex = re.compile(regex_str)
				95	else:
				96	regex = SearchMatch.mtstat_regex
				97
				98	# find all lines matching the regex
				99	stats = []
				100	for line in result.gestures_log.splitlines():
				101	if regex.search(line):
				102	stats.append(SearchMatch(line, filename))
				103	return (stats, num_syn)
				104	except:
				105	print filename, traceback.format_exc()
				106	return ([], 0)
				107
				108	def _FindAllMatches(self, number, parallel, search_regex=None):
				109	# make sure the replay binaries are up to date
				110	MTReplay().Recompile()
				111
				112	# list all log files
				113	files = [os.path.abspath(os.path.join(log_dir, f))
				114	for f in os.listdir(log_dir)
				115	if f.isdigit()]
				116
				117	# randomly select subset of files
				118	if number is not None:
				119	files = random.sample(files, number)
				120
				121	# arguments for MTStatSearchSubprocess
				122	parameters = [(file, search_regex) for file in files]
				123
				124	print "Processing %d log files" % len(files)
				125
				126	# process all files either in parallel or sequential
				127	if parallel:
				128	pool = multiprocessing.Pool()
				129	pool_results = pool.map(MTStatSearchSubprocess, parameters)
				130	pool.terminate()
				131	else:
				132	pool_results = map(MTStatSearchSubprocess, parameters)
				133
				134	# merge results of each file into one big list
				135	entries = []
				136	syn_count = 0
				137	for file_entries, file_syn_count in pool_results:
				138	syn_count = syn_count + file_syn_count
				139
				140	# the = operator is used to make only the latest
				141	# entry of each file count.
				142	file_uniques = {}
				143	for entry in file_entries:
				144	if entry.operator == '=':
				145	file_uniques[entry.key] = entry
				146	else:
				147	entries.append(entry)
				148
				149	# add the last value of each unique entry
				150	for value in file_uniques.values():
				151	entries.append(value)
				152
				153	# syn reports are coming at approx 60 Hz on most platforms
				154	syn_per_second = 60.0
				155	hours = syn_count / syn_per_second / 60.0 / 60.0
				156
				157	print "Processed ~%.2f hours of interaction" % hours
				158
				159	return entries
				160
				161	def Search(self, query=None, regex=None, number=None, parallel=True):
				162	""" Search for occurences of a specific tag or regex.
				163
				164	Specify either a 'query' or a 'regex'. Queries are formatted
				165	in a simple "key operator value" format. For example:
				166	"MyKey > 5" will return all matches where MyKey has a value
				167	of greater than 5.
				168	Supported operators are: >, <, >=, <=, =, !=
				169
				170	number: optional number of random reports to use
				171	parallel: use parallel processing
				172
				173	returns a dictionary of lists containing the matches
				174	for each file.
				175	"""
				176	entries = self._FindAllMatches(number, parallel, search_regex=regex)
				177
				178	if query:
				179	match = re.match("\s(\w+)\s([<>=!]+)\s([0-9a-zA-Z]+)\s", query)
				180	if not match:
				181	print query, " is not a valid query"
				182	return {}
				183	search_key = match.group(1)
				184	search_op = MTStat.search_operators[match.group(2)]
				185	search_value = match.group(3)
				186
				187	entries_by_file = collections.defaultdict(list)
				188	for entry in entries:
				189	if query is None or (entry.key == search_key and
				190	search_op(entry.value, search_value)):
				191	entries_by_file[entry.file].append(entry)
				192	return entries_by_file
				193
				194	def GatherStats(self, number=None, parallel=True, num_bins=10):
				195	""" Gathers stats on feedback reports.
				196
				197	Returns a dictionary with a histogram for each recorded key.
				198	"""
				199	entries = self._FindAllMatches(number, parallel)
				200
				201	# gather values for each key in a list
				202	value_collection = collections.defaultdict(list)
				203	for entry in entries:
				204	value_collection[entry.key].append(entry.value)
				205
				206	# build histograms
				207	histograms = {}
				208	for key, values in value_collection.items():
				209	histograms[key] = self._Histogram(values, num_bins)
				210	return SortedDict(histograms)
				211
				212	def _Histogram(self, values, num_bins):
				213	def RawHistogram(values):
				214	return SortedDict(collections.Counter(values))
				215
				216	# convert all items to integers.
				217	integers = []
				218	for value in values:
				219	try:
				220	integers.append(int(value))
				221	except:
				222	# not an integer.
				223	return RawHistogram(values)
				224
				225	# don't condense lists that are already small enough
				226	if len(set(integers)) <= num_bins:
				227	return RawHistogram(integers)
				228
				229	# all integer values, use bins for histogram
				230	histogram = collections.OrderedDict()
				231	integers = sorted(integers)
				232
				233	# calculate bin size (append one at the end to include last value)
				234	begin = integers[0]
				235	end = integers[-1] + 1
				236	bin_size = float(end - begin) / float(num_bins)
				237
				238	# remove each bins integers from the list and count them.
				239	for i in range(num_bins):
				240	high_v = round((i + 1) * bin_size) + begin
				241
				242	filtered = filter(lambda i: i >= high_v, integers)
				243	histogram["<%d" % high_v] = len(integers) - len(filtered)
				244	integers = filtered
				245	return histogram
				246
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	247	def DownloadFile(self, id):
				248	"""Download one feedback log into cache."""
				249	downloader = FeedbackDownloader()
				250
				251	filename = os.path.join(log_dir, id)
				252	if os.path.exists(filename):
				253	print 'Skipping existing report', id
				254	return
				255
				256	print 'Downloading new report', id
				257	try:
				258	# might throw IO/Tar/Zip/etc exceptions
				259	report = FeedbackLog(id, force_latest='pad')
Dennis Kempin	55af9cc	2013-06-20 15:07:21 -0700	[diff] [blame]	260	# Test parse. Will throw exception on malformed log
				261	json.loads(report.activity)
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	262	except:
				263	print 'Invalid report %s' % id
Dennis Kempin	55af9cc	2013-06-20 15:07:21 -0700	[diff] [blame]	264	return
				265
Dennis Kempin	19e972b	2013-06-20 13:21:38 -0700	[diff] [blame]	266	# check if report contains logs and actual events
				267	if report.activity and report.evdev and 'E:' in report.evdev:
				268	report.SaveAs(filename)
				269	else:
				270	print 'Invalid report %s' % id
				271
				272	def Download(self, num, offset=0, parallel=True):
				273	"""Download 'num' new feedback logs into cache."""
				274	downloader = FeedbackDownloader()
				275
				276	# download list of feedback report id's
				277	params = {
				278	'$limit': str(num),
				279	'$offset': str(offset),
				280	'mapping': ':list',
				281	'productId': '208' # ChromeOS
				282	}
				283	url = ('https://feedback.corp.google.com/resty/ReportSearch?' +
				284	urllib.urlencode(params))
				285	data = downloader.DownloadFile(url)
				286	data = data[data.find('{'):] # strip garbage before json data
				287
				288	reports_json = json.loads(data)
				289	report_ids = [item['id'] for item in reports_json['results']]
				290
				291	# Download and check each report
				292	if parallel:
				293	pool = multiprocessing.Pool()
				294	results = pool.map(MTStatDownloadSubprocess, report_ids)
				295	pool.terminate()
				296	else:
				297	results = map(MTStatDownloadSubprocess, report_ids)