Blame - mtstat/queryengine.py - chromium.googlesource.com/chromiumos/platform/mttools

blob: ab96b52deeb8b7c469ac3481300dfef5998daa73 [file] [log] [blame]

Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	1	#! /usr/bin/env python
				2	# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
Harry Cutts	0edf157	2020-01-21 15:42:10 -0800	[diff] [blame]	5
				6	from __future__ import print_function
				7
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	8	from mtlib.log import FeedbackDownloader, FeedbackLog, Log
				9	from mtreplay import MTReplay
Dennis Kempin	7432eb0	2014-03-18 13:41:41 -0700	[diff] [blame]	10	import fnmatch
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	11	import json
				12	import multiprocessing
				13	import os
				14	import random
				15	import re
				16	import traceback
				17	import urllib
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	18	import datetime
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	19
				20
				21	# prepare folder for log files
				22	script_dir = os.path.dirname(os.path.realpath(__file__))
				23	log_dir = os.path.join(script_dir, '../cache/logs/')
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	24	invalid_filename = os.path.join(log_dir, 'invalid')
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	25	if not os.path.exists(log_dir):
				26	os.mkdir(log_dir)
				27
				28
				29	class Query(object):
				30	""" Abstract class for queries.
				31
				32	These objects are applied to files by the QueryEngine, which
				33	calls FindMatches to execute the search on the replay results.
				34
				35	capture_logs can be set to true to direct the QueryEngine to return
				36	the Log object in the QueryResults.
				37	"""
				38	def __init__(self):
				39	self.capture_logs = False
				40
				41	def FindMatches(self, replay_results, filename):
				42	""" Returns a list of QueryMatch objects """
				43	return []
				44
				45
				46	class QueryMatch(object):
				47	""" Describes a match and contains information on how to locate it """
				48	def __init__(self, filename, timestamp, line):
				49	self.filename = filename
				50	self.timestamp = timestamp
				51	self.line = line
				52
				53	def __str__(self):
				54	if self.timestamp:
				55	return str(self.timestamp) + ": " + self.line
				56	else:
				57	return self.line
				58
				59	def __repr__(self):
				60	return str(self)
				61
				62
				63	class QueryResult(object):
				64	""" Describes the results of a query on a file.
				65
				66	This includes all matches found in this file, the number of
				67	SYN reports processed and optionally the activity Log object,
				68	if requested by the Query."""
				69	def __init__(self, filename):
				70	self.filename = filename
				71	self.syn_count = 0
				72	self.matches = []
				73	self.log = None
				74
				75
				76	class QueryEngine(object):
				77	""" This class allows queries to be executed on a large number of log files.
				78
				79	It managed a pool of log files, allows more log files to be downloaded and
				80	can execute queries in parallel on this pool of log files.
				81	"""
				82
				83	def ExecuteSingle(self, filename, query):
				84	""" Executes a query on a single log file """
				85	log = Log(filename)
				86	replay = MTReplay()
				87	result = QueryResult(filename)
				88
				89	# find platform for file
				90	platform = replay.PlatformOf(log)
				91	if not platform:
Harry Cutts	0edf157	2020-01-21 15:42:10 -0800	[diff] [blame]	92	print("No platform for %s" % os.path.basename(filename))
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	93	return result
				94
				95	# count the number of syn reports in log file
				96	result.syn_count = len(tuple(re.finditer("0000 0000 0", log.evdev)))
				97
				98	# run replay
				99	try:
				100	replay_result = replay.Replay(log)
				101	except:
				102	return result
				103
				104	result.matches = query.FindMatches(replay_result, filename)
				105	if result.matches:
				106	result.log = replay_result.log
				107
				108	return result
				109
				110	def Execute(self, filenames, queries, parallel=True):
				111	""" Executes a query on a list of log files.
				112
				113	filenames: list of filenames to execute
				114	queries: either a single query object for all files,
				115	or a dictionary mapping filenames to query objects.
				116	parallel: set to False to execute sequentially.
				117	"""
				118
Harry Cutts	0edf157	2020-01-21 15:42:10 -0800	[diff] [blame]	119	print("Processing %d log files" % len(filenames))
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	120
				121	if hasattr(queries, 'FindMatches'):
				122	queries = dict([(filename, queries) for filename in filenames])
				123
				124	# arguments for QuerySubprocess
				125	parameters = [(name, queries[name])
				126	for name in filenames if name in queries]
				127
				128	# process all files either in parallel or sequential
				129	if parallel:
				130	pool = multiprocessing.Pool()
				131	results = pool.map(ExecuteSingleSubprocess, parameters)
				132	pool.terminate()
				133	else:
				134	results = map(ExecuteSingleSubprocess, parameters)
				135
				136	# count syn reports
				137	syn_count = sum([result.syn_count for result in results])
				138
				139	# create dict of results by filename
				140	result_dict = dict([(result.filename, result)
				141	for result in results
				142	if result.matches])
				143
				144	# syn reports are coming at approx 60 Hz on most platforms
				145	syn_per_second = 60.0
				146	hours = syn_count / syn_per_second / 60.0 / 60.0
Harry Cutts	0edf157	2020-01-21 15:42:10 -0800	[diff] [blame]	147	print("Processed ~%.2f hours of interaction" % hours)
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	148
				149	return result_dict
				150
Dennis Kempin	7432eb0	2014-03-18 13:41:41 -0700	[diff] [blame]	151	def SelectFiles(self, number, platform=None):
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	152	""" Returns a random selection of files from the pool """
				153	# list all log files
				154	files = [os.path.abspath(os.path.join(log_dir, f))
				155	for f in os.listdir(log_dir)
				156	if f.isdigit()]
				157
Dennis Kempin	7432eb0	2014-03-18 13:41:41 -0700	[diff] [blame]	158	if platform:
Harry Cutts	0edf157	2020-01-21 15:42:10 -0800	[diff] [blame]	159	print("Filtering files by platform. This may take a while.")
Dennis Kempin	7432eb0	2014-03-18 13:41:41 -0700	[diff] [blame]	160	replay = MTReplay()
				161	pool = multiprocessing.Pool()
				162	platforms = pool.map(GetPlatformSubprocess, files)
				163	pool.terminate()
				164
				165	filtered = filter(lambda (f, p): p and fnmatch.fnmatch(p, platform),
				166	platforms)
				167	files = map(lambda (f, p): f, filtered)
Harry Cutts	0edf157	2020-01-21 15:42:10 -0800	[diff] [blame]	168	print("found", len(files), "log files matching", platform)
Dennis Kempin	7432eb0	2014-03-18 13:41:41 -0700	[diff] [blame]	169
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	170	# randomly select subset of files
				171	if number is not None:
				172	files = random.sample(files, number)
				173	return files
				174
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	175	def GetInvalidIDs(self):
				176	"""Look for list of feedback IDs with invalid logs"""
				177	if not os.path.exists(invalid_filename):
				178	return []
				179	return [x.strip() for x in open(invalid_filename).readlines()]
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	180
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	181	def DownloadFile(self, id, downloader, invalid_ids):
				182	"""Download one feedback log into the pool.
				183
				184	Return 1 if successful, 0 if not.
				185	"""
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	186	filename = os.path.join(log_dir, id)
				187	if os.path.exists(filename):
Harry Cutts	0edf157	2020-01-21 15:42:10 -0800	[diff] [blame]	188	print("Skipping existing report", id)
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	189	return 0
				190	if id in invalid_ids:
Harry Cutts	0edf157	2020-01-21 15:42:10 -0800	[diff] [blame]	191	print("Skipping invalid report", id)
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	192	return 0
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	193
Harry Cutts	0edf157	2020-01-21 15:42:10 -0800	[diff] [blame]	194	print("Downloading new report", id)
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	195	try:
				196	# might throw IO/Tar/Zip/etc exceptions
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	197	report = FeedbackLog(id, force_latest='pad', downloader=downloader)
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	198	# Test parse. Will throw exception on malformed log
				199	json.loads(report.activity)
				200	except:
Harry Cutts	0edf157	2020-01-21 15:42:10 -0800	[diff] [blame]	201	print("Invalid report", id)
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	202	with open(invalid_filename, 'a') as f:
				203	f.write(id + '\n')
				204	return 0
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	205
				206	# check if report contains logs and actual events
				207	if report.activity and report.evdev and 'E:' in report.evdev:
				208	report.SaveAs(filename)
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	209	return 1
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	210	else:
Harry Cutts	0edf157	2020-01-21 15:42:10 -0800	[diff] [blame]	211	print("Invalid report %s" % id)
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	212	with open(invalid_filename, 'a') as f:
				213	f.write(id + '\n')
				214	return 0
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	215
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	216	def Download(self, num_to_download, parallel=True):
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	217	"""Download 'num' new feedback logs into the pool."""
				218	downloader = FeedbackDownloader()
				219
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	220	dt = datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1)
				221	end_time = (((dt.days * 24 * 60 * 60 + dt.seconds) * 1000) +
				222	(dt.microseconds / 10))
				223	num_to_download = int(num_to_download)
				224	num_downloaded = 0
				225	invalid_ids = self.GetInvalidIDs()
				226	page_token = ''
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	227
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	228	while num_to_download > num_downloaded:
				229	# Download list of feedback report id's
				230	num_this_iteration = min((num_to_download - num_downloaded) * 5, 500)
				231	page_token, report_ids = downloader.DownloadIDs(
				232	num_this_iteration, end_time, page_token)
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	233
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	234	# Download and check each report
				235	parameters = [(r_id, downloader, invalid_ids) for r_id in report_ids]
				236	if parallel:
				237	pool = multiprocessing.Pool()
				238	results = sum(pool.map(DownloadFileSubprocess, parameters))
				239	pool.terminate()
				240	else:
				241	results = sum(map(DownloadFileSubprocess, parameters))
				242	num_downloaded += results
Harry Cutts	0edf157	2020-01-21 15:42:10 -0800	[diff] [blame]	243	print("--------------------")
				244	print("%d/%d reports found" % (num_downloaded, num_to_download))
				245	print("--------------------")
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	246
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	247
Dennis Kempin	7432eb0	2014-03-18 13:41:41 -0700	[diff] [blame]	248	def GetPlatformSubprocess(filename):
				249	replay = MTReplay()
				250	log = Log(filename)
				251	detected_platform = replay.PlatformOf(log)
				252	if detected_platform:
Harry Cutts	0edf157	2020-01-21 15:42:10 -0800	[diff] [blame]	253	print(filename + ": " + detected_platform.name)
Dennis Kempin	7432eb0	2014-03-18 13:41:41 -0700	[diff] [blame]	254	return filename, detected_platform.name
				255	else:
				256	return filename, None
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	257
				258	def ExecuteSingleSubprocess(args):
				259	""" Wrapper for subprocesses to run ExecuteSingle """
				260	try:
				261	return QueryEngine().ExecuteSingle(args[0], args[1])
				262	except Exception, e:
				263	traceback.print_exc()
				264	raise e
				265
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	266	def DownloadFileSubprocess(args):
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	267	""" Wrapper for subprocesses to run DownloadFile """
				268	try:
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	269	return QueryEngine().DownloadFile(args[0], args[1], args[2])
Dennis Kempin	d5b5902	2013-07-17 14:12:55 -0700	[diff] [blame]	270	except Exception, e:
				271	traceback.print_exc()
Sean O'Brien	fd204da	2017-05-02 15:13:11 -0700	[diff] [blame]	272	raise e