MTSTat: Refactored to support searching for changes
The new -c or --changes parameter allows to search for gesture changes
introduced by code changes, which is very useful for testing and
validating code changes or for searching for real-world log files for
regresion tests.
BUG=chromium:259532
TEST=run mtstat -c
Change-Id: I5b5d778144085426abb66408bc3e331d6ff450e7
Reviewed-on: https://gerrit.chromium.org/gerrit/62392
Reviewed-by: Dennis Kempin <denniskempin@chromium.org>
Commit-Queue: Dennis Kempin <denniskempin@chromium.org>
Tested-by: Dennis Kempin <denniskempin@chromium.org>
diff --git a/mtstat/main.py b/mtstat/main.py
index 13e33fb..971ea7f 100755
--- a/mtstat/main.py
+++ b/mtstat/main.py
@@ -84,6 +84,23 @@
$ %prog -r "ERROR:"
+Searching for Changed Gestures
+------------------------------
+
+MTStat can also be used to search for changes in the generated gestures
+introduced by changes to the code of the gestures library.
+
+In order to do so make local changes to the gestures library without
+commiting them. Then run:
+
+$ %prog -c [-n 100]
+
+This command will compare to the HEAD version of gestures and present all
+changed gestures.
+
+You can also limit the search to a certain type of gestures:
+
+$ %prog -c -g "ButtonDown" [-n 100]
"""
def main():
@@ -106,6 +123,17 @@
parser.add_option('-r', '--regex',
dest='regex', default=None,
help='search log output for regex')
+ parser.add_option('-g', '--gestures',
+ dest='gestures', default=None,
+ help='search log for gestures')
+ parser.add_option('-b', '--bins',
+ dest='bins', default=10,
+ help='number of bins for histograms')
+ parser.add_option('-c', '--changes',
+ dest='changes', action='store_true', default=False,
+ help='search for changes introduced by ' +
+ 'local code changes to the gestures library')
+
(options, args) = parser.parse_args()
stat = MTStat()
@@ -115,31 +143,31 @@
number = None if options.number is None else int(options.number)
- if options.search or options.regex:
- if options.search:
- results = stat.Search(query=options.search, number=number)
+ if (options.search or options.regex or options.gestures or
+ options.changes):
+ if options.changes:
+ results = stat.SearchChanges(options.gestures,
+ number=number)
else:
- results = stat.Search(regex=options.regex, number=number)
+ results = stat.Search(search=options.search, gestures=options.gestures,
+ regex=options.regex, number=number)
print "Found occurences in %d files" % len(results)
print "Serving results, press CTRL-C to abort"
- for filename, entries in results.items():
- print "%d occurences in %s:" % (len(entries), filename)
- for entry in entries:
- print " " + entry.line
+ for filename, result in results.items():
+ print "%d occurences in %s:" % (len(result.matches), filename)
+ for match in result.matches:
+ print " ", match
# display link to MTEdit session
if not options.list:
- first = entries[0]
- replay_results = MTReplay().Replay(Log(filename))
- editor = MTEdit()
-
- # for MTStat results we have
- editor.View(replay_results.log, first.timestamp)
+ MTEdit().View(result.log, result.matches[0].timestamp)
raw_input("Press Enter to continue...")
return
- results = stat.GatherStats(number)
+
+ bins = int(options.bins)
+ results = stat.GatherStats(number, num_bins=bins)
for key, hist in results.items():
print "%s:" % key
overall_count = sum(hist.values())
diff --git a/mtstat/mtstat.py b/mtstat/mtstat.py
index d027789..47894af 100644
--- a/mtstat/mtstat.py
+++ b/mtstat/mtstat.py
@@ -2,60 +2,35 @@
# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
-from mtlib.log import Log, FeedbackDownloader, FeedbackLog
+from collections import Counter, defaultdict, namedtuple, OrderedDict
+from itertools import chain
from mtreplay import MTReplay
-import collections
-import json
-import multiprocessing
-import os
-import random
+from queryengine import Query, QueryEngine, QueryMatch, QueryResult
import re
-import traceback
-import urllib
-
-script_dir = os.path.dirname(os.path.realpath(__file__))
-log_dir = os.path.join(script_dir, '../cache/logs/')
-if not os.path.exists(log_dir):
- os.mkdir(log_dir)
-def SortedDict(dict):
- return collections.OrderedDict(sorted(dict.items(), key=lambda t: t[0]))
+class MTStatQuery(Query):
+ """ Searches for MTStat lines.
+ An MTStat line looks like 'MTStat:124.321:Key=Value' and is used
+ for generating statistics and searching of numeric values.
+ """
+ line_regex = re.compile('MTStat:([0-9]+\.[0-9]+):(\w+)([=:])(\w+)')
+ line_match = namedtuple("LineMatch", "timestamp key operator value")
-def MTStatSearchSubprocess(params):
- return MTStat().FindMatchesInFile(params[0], params[1])
-
-
-def MTStatDownloadSubprocess(id):
- return MTStat().DownloadFile(id)
-
-
-class SearchMatch(object):
- # Example: MTStat:124.321:Key=Value
- mtstat_regex = re.compile('MTStat:([0-9]+\.[0-9]+):(\w+)([=:])(\w+)')
-
- def __init__(self, line, file):
- self.line = line
- self.file = file
-
- self.key = None
- self.value = None
- self.operator = None
- self.timestamp = None
- self.ismtstat = False
-
- match = SearchMatch.mtstat_regex.search(self.line)
+ @classmethod
+ def MatchLine(cls, line):
+ match = cls.line_regex.match(line)
if match:
- self.timestamp = float(match.group(1))
- self.key = match.group(2)
- self.operator = match.group(3)
- self.value = match.group(4)
- self.ismtstat = True
+ return cls.line_match(
+ timestamp=match.group(1),
+ key=match.group(2),
+ operator=match.group(3),
+ value=match.group(4)
+ )
+ return None
-
-class MTStat(object):
-
+ # supported operators for queries
search_operators = {
">": lambda a, b: float(a) > float(b),
">=": lambda a, b: float(a) >= float(b),
@@ -66,102 +41,145 @@
"!=": lambda a, b: str(a) != str(b)
}
- def FindMatchesInFile(self, filename, regex_str=None):
- """ Searchs all MTStat entries from a log file
+ def __init__(self, query=None, capture_logs=False):
+ self.query = query
+ self.capture_logs = capture_logs
+ # parse search query
+ if self.query:
+ # example: Key >= 32
+ query_regex = re.compile('\s*(\w+)\s*([<>=!]+)\s*([0-9a-zA-Z]+)\s*')
+ match = query_regex.match(query)
+ if not match:
+ raise ValueError(query, " is not a valid query")
+ self.key = match.group(1)
+ self.op = match.group(2)
+ self.value = match.group(3)
- Replays a logfile and searched for MTStat log entries.
- Returns two values, a list of SearchMarches and the number of
- SYN reports that has been processed.
+ def FindMatches(self, replay_results, filename):
+ # find MTStat lines in log
+ lines = replay_results.gestures_log.splitlines()
+ lines = filter(lambda line: MTStatQuery.MatchLine(line), lines)
+ all_matches = map(lambda line: MTStatMatch(line, filename), lines)
+
+ # samples are denoted by the : operator, all of them are returned.
+ samples = filter(lambda match: match.operator == ":", all_matches)
+
+ # updates are denoted by the = operator and force only the last
+ # update to be returned.
+ updates = filter(lambda match: match.operator == "=", all_matches)
+ last_updates = dict([(update.key, update) for update in updates])
+
+ matches = samples + last_updates.values()
+
+ # filter by search query if requested
+ if self.query:
+ matches = filter(self.Match, matches)
+ return matches
+
+ def Match(self, match):
+ op = MTStatQuery.search_operators[self.op]
+ return (match.key == self.key and
+ op(match.value, self.value))
+
+
+class MTStatMatch(QueryMatch):
+ def __init__(self, line, file):
+ match = MTStatQuery.MatchLine(line)
+ QueryMatch.__init__(self, file, float(match.timestamp), line)
+ self.key = match.key
+ self.operator = match.operator
+ self.value = match.value
+
+
+class RegexQuery(Query):
+ """ Searches the raw gestures log with a regular expression """
+ def __init__(self, query):
+ self.regex = re.compile(query)
+ self.capture_logs = True
+
+ def FindMatches(self, replay_results, filename):
+ lines = replay_results.gestures_log.splitlines()
+ lines = filter(lambda line: self.regex.match(line), lines)
+ matches = map(lambda line: QueryMatch(filename, None, line), lines)
+ return matches
+
+
+class GesturesQuery(Query):
+ """ Searches for gestures with matching type """
+ def __init__(self, type, capture_logs):
+ self.type = type
+ self.capture_logs = capture_logs
+
+ def FindMatches(self, replay_results, filename):
+ gestures = replay_results.gestures.gestures
+ if self.type:
+ gestures = filter(lambda g: g.type == self.type, gestures)
+ return map(lambda g: GesturesMatch(filename, g), gestures)
+
+
+class GesturesMatch(QueryMatch):
+ def __init__(self, filename, gesture):
+ QueryMatch.__init__(self, filename, gesture.start, str(gesture))
+ self.gesture = gesture
+
+
+class GesturesDiffQuery(GesturesQuery):
+ """ Compare gestures to 'original' QueryResult and search for changes """
+ def __init__(self, type, original):
+ self.type = type
+ self.original = original
+ self.capture_logs = True
+
+ def FindMatches(self, replay_results, filename):
+ self_matches = GesturesQuery.FindMatches(self, replay_results, filename)
+ original_matches = self.original.matches
+
+ size = min(len(original_matches), len(self_matches))
+ matches = []
+ for i in range(size):
+ if str(self_matches[i].gesture) != str(original_matches[i].gesture):
+ match = GesturesDiffMatch(filename, self_matches[i].gesture,
+ original_matches[i].gesture)
+ matches.append(match)
+ return matches
+
+
+class GesturesDiffMatch(GesturesMatch):
+ def __init__(self, filename, new, original):
+ GesturesMatch.__init__(self, filename, new)
+ self.original = original
+
+ def __str__(self):
+ return "%f: %s != %s" % (self.timestamp, str(self.gesture),
+ str(self.original))
+
+
+class MTStat(object):
+ def SearchChanges(self, type=None, number=None, parallel=True):
+ """ Search for changed gestures.
+
+ This command will compare the gestures output of the HEAD version of
+ the gestures library with the local version.
+ Optionally the type of gestures to look at can be specified by 'type'
"""
- try:
- log = Log(filename)
+ engine = QueryEngine()
+ files = engine.SelectFiles(number)
- # find platform for file
- replay = MTReplay()
- platform = replay.PlatformOf(log)
- if not platform:
- print 'No platform for %s' % os.path.basename(filename)
- return ([], 0)
+ MTReplay().Recompile(head=True)
+ gestures_query = GesturesQuery(type, False)
+ original_results = engine.Execute(files, gestures_query, parallel=parallel)
- # count the number of syn reports in log file
- num_syn = len(tuple(re.finditer("0000 0000 0", log.evdev)))
-
- # run replay
- result = replay.Replay(log)
-
- # per default we search for MTStat lines
- if regex_str:
- regex = re.compile(regex_str)
- else:
- regex = SearchMatch.mtstat_regex
-
- # find all lines matching the regex
- stats = []
- for line in result.gestures_log.splitlines():
- if regex.search(line):
- stats.append(SearchMatch(line, filename))
- return (stats, num_syn)
- except:
- print filename, traceback.format_exc()
- return ([], 0)
-
- def _FindAllMatches(self, number, parallel, search_regex=None):
- # make sure the replay binaries are up to date
MTReplay().Recompile()
+ diff_queries =dict([
+ (file, GesturesDiffQuery(type, original_results[file]))
+ for file in files if file in original_results])
+ return engine.Execute(files, diff_queries, parallel=parallel)
- # list all log files
- files = [os.path.abspath(os.path.join(log_dir, f))
- for f in os.listdir(log_dir)
- if f.isdigit()]
-
- # randomly select subset of files
- if number is not None:
- files = random.sample(files, number)
-
- # arguments for MTStatSearchSubprocess
- parameters = [(file, search_regex) for file in files]
-
- print "Processing %d log files" % len(files)
-
- # process all files either in parallel or sequential
- if parallel:
- pool = multiprocessing.Pool()
- pool_results = pool.map(MTStatSearchSubprocess, parameters)
- pool.terminate()
- else:
- pool_results = map(MTStatSearchSubprocess, parameters)
-
- # merge results of each file into one big list
- entries = []
- syn_count = 0
- for file_entries, file_syn_count in pool_results:
- syn_count = syn_count + file_syn_count
-
- # the = operator is used to make only the latest
- # entry of each file count.
- file_uniques = {}
- for entry in file_entries:
- if entry.operator == '=':
- file_uniques[entry.key] = entry
- else:
- entries.append(entry)
-
- # add the last value of each unique entry
- for value in file_uniques.values():
- entries.append(value)
-
- # syn reports are coming at approx 60 Hz on most platforms
- syn_per_second = 60.0
- hours = syn_count / syn_per_second / 60.0 / 60.0
-
- print "Processed ~%.2f hours of interaction" % hours
-
- return entries
-
- def Search(self, query=None, regex=None, number=None, parallel=True):
+ def Search(self, search=None, gestures=None, regex=None,
+ number=None, parallel=True):
""" Search for occurences of a specific tag or regex.
- Specify either a 'query' or a 'regex'. Queries are formatted
+ Specify either a 'search' or a 'regex'. Search queries are formatted
in a simple "key operator value" format. For example:
"MyKey > 5" will return all matches where MyKey has a value
of greater than 5.
@@ -173,45 +191,44 @@
returns a dictionary of lists containing the matches
for each file.
"""
- entries = self._FindAllMatches(number, parallel, search_regex=regex)
+ engine = QueryEngine()
+ files = engine.SelectFiles(number)
- if query:
- match = re.match("\s*(\w+)\s*([<>=!]+)\s*([0-9a-zA-Z]+)\s*", query)
- if not match:
- print query, " is not a valid query"
- return {}
- search_key = match.group(1)
- search_op = MTStat.search_operators[match.group(2)]
- search_value = match.group(3)
+ if search:
+ query = MTStatQuery(search, True)
+ elif regex:
+ query = RegexQuery(regex)
+ elif gestures:
+ query = GesturesQuery(gestures, True)
+ else:
+ return None
- entries_by_file = collections.defaultdict(list)
- for entry in entries:
- if query is None or (entry.key == search_key and
- search_op(entry.value, search_value)):
- entries_by_file[entry.file].append(entry)
- return entries_by_file
+ return engine.Execute(files, query, parallel=parallel)
def GatherStats(self, number=None, parallel=True, num_bins=10):
""" Gathers stats on feedback reports.
Returns a dictionary with a histogram for each recorded key.
"""
- entries = self._FindAllMatches(number, parallel)
+ engine = QueryEngine()
+ files = engine.SelectFiles(number)
+ results = engine.Execute(files, MTStatQuery(), parallel=parallel)
# gather values for each key in a list
- value_collection = collections.defaultdict(list)
- for entry in entries:
- value_collection[entry.key].append(entry.value)
+ all_matches = chain(*[result.matches for result in results.values()])
+ value_collection = defaultdict(list)
+ for match in all_matches:
+ value_collection[match.key].append(match.value)
# build histograms
histograms = {}
for key, values in value_collection.items():
histograms[key] = self._Histogram(values, num_bins)
- return SortedDict(histograms)
+ return OrderedDict(sorted(histograms.items(), key=lambda t: t[0]))
def _Histogram(self, values, num_bins):
def RawHistogram(values):
- return SortedDict(collections.Counter(values))
+ return OrderedDict(Counter(values))
# convert all items to integers.
integers = []
@@ -227,7 +244,7 @@
return RawHistogram(integers)
# all integer values, use bins for histogram
- histogram = collections.OrderedDict()
+ histogram = OrderedDict()
integers = sorted(integers)
# calculate bin size (append one at the end to include last value)
@@ -243,55 +260,3 @@
histogram["<%d" % high_v] = len(integers) - len(filtered)
integers = filtered
return histogram
-
- def DownloadFile(self, id):
- """Download one feedback log into cache."""
- downloader = FeedbackDownloader()
-
- filename = os.path.join(log_dir, id)
- if os.path.exists(filename):
- print 'Skipping existing report', id
- return
-
- print 'Downloading new report', id
- try:
- # might throw IO/Tar/Zip/etc exceptions
- report = FeedbackLog(id, force_latest='pad')
- # Test parse. Will throw exception on malformed log
- json.loads(report.activity)
- except:
- print 'Invalid report %s' % id
- return
-
- # check if report contains logs and actual events
- if report.activity and report.evdev and 'E:' in report.evdev:
- report.SaveAs(filename)
- else:
- print 'Invalid report %s' % id
-
- def Download(self, num, offset=0, parallel=True):
- """Download 'num' new feedback logs into cache."""
- downloader = FeedbackDownloader()
-
- # download list of feedback report id's
- params = {
- '$limit': str(num),
- '$offset': str(offset),
- 'mapping': ':list',
- 'productId': '208' # ChromeOS
- }
- url = ('https://feedback.corp.google.com/resty/ReportSearch?' +
- urllib.urlencode(params))
- data = downloader.DownloadFile(url)
- data = data[data.find('{'):] # strip garbage before json data
-
- reports_json = json.loads(data)
- report_ids = [item['id'] for item in reports_json['results']]
-
- # Download and check each report
- if parallel:
- pool = multiprocessing.Pool()
- results = pool.map(MTStatDownloadSubprocess, report_ids)
- pool.terminate()
- else:
- results = map(MTStatDownloadSubprocess, report_ids)
diff --git a/mtstat/queryengine.py b/mtstat/queryengine.py
new file mode 100644
index 0000000..c9ff288
--- /dev/null
+++ b/mtstat/queryengine.py
@@ -0,0 +1,225 @@
+#! /usr/bin/env python
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+from mtlib.log import FeedbackDownloader, FeedbackLog, Log
+from mtreplay import MTReplay
+import json
+import multiprocessing
+import os
+import random
+import re
+import traceback
+import urllib
+
+
+# prepare folder for log files
+script_dir = os.path.dirname(os.path.realpath(__file__))
+log_dir = os.path.join(script_dir, '../cache/logs/')
+if not os.path.exists(log_dir):
+ os.mkdir(log_dir)
+
+
+class Query(object):
+ """ Abstract class for queries.
+
+ These objects are applied to files by the QueryEngine, which
+ calls FindMatches to execute the search on the replay results.
+
+ capture_logs can be set to true to direct the QueryEngine to return
+ the Log object in the QueryResults.
+ """
+ def __init__(self):
+ self.capture_logs = False
+
+ def FindMatches(self, replay_results, filename):
+ """ Returns a list of QueryMatch objects """
+ return []
+
+
+class QueryMatch(object):
+ """ Describes a match and contains information on how to locate it """
+ def __init__(self, filename, timestamp, line):
+ self.filename = filename
+ self.timestamp = timestamp
+ self.line = line
+
+ def __str__(self):
+ if self.timestamp:
+ return str(self.timestamp) + ": " + self.line
+ else:
+ return self.line
+
+ def __repr__(self):
+ return str(self)
+
+
+class QueryResult(object):
+ """ Describes the results of a query on a file.
+
+ This includes all matches found in this file, the number of
+ SYN reports processed and optionally the activity Log object,
+ if requested by the Query."""
+ def __init__(self, filename):
+ self.filename = filename
+ self.syn_count = 0
+ self.matches = []
+ self.log = None
+
+
+class QueryEngine(object):
+ """ This class allows queries to be executed on a large number of log files.
+
+ It managed a pool of log files, allows more log files to be downloaded and
+ can execute queries in parallel on this pool of log files.
+ """
+
+ def ExecuteSingle(self, filename, query):
+ """ Executes a query on a single log file """
+ log = Log(filename)
+ replay = MTReplay()
+ result = QueryResult(filename)
+
+ # find platform for file
+ platform = replay.PlatformOf(log)
+ if not platform:
+ print 'No platform for %s' % os.path.basename(filename)
+ return result
+
+ # count the number of syn reports in log file
+ result.syn_count = len(tuple(re.finditer("0000 0000 0", log.evdev)))
+
+ # run replay
+ try:
+ replay_result = replay.Replay(log)
+ except:
+ return result
+
+ result.matches = query.FindMatches(replay_result, filename)
+ if result.matches:
+ result.log = replay_result.log
+
+ return result
+
+ def Execute(self, filenames, queries, parallel=True):
+ """ Executes a query on a list of log files.
+
+ filenames: list of filenames to execute
+ queries: either a single query object for all files,
+ or a dictionary mapping filenames to query objects.
+ parallel: set to False to execute sequentially.
+ """
+
+ print "Processing %d log files" % len(filenames)
+
+ if hasattr(queries, 'FindMatches'):
+ queries = dict([(filename, queries) for filename in filenames])
+
+ # arguments for QuerySubprocess
+ parameters = [(name, queries[name])
+ for name in filenames if name in queries]
+
+ # process all files either in parallel or sequential
+ if parallel:
+ pool = multiprocessing.Pool()
+ results = pool.map(ExecuteSingleSubprocess, parameters)
+ pool.terminate()
+ else:
+ results = map(ExecuteSingleSubprocess, parameters)
+
+ # count syn reports
+ syn_count = sum([result.syn_count for result in results])
+
+ # create dict of results by filename
+ result_dict = dict([(result.filename, result)
+ for result in results
+ if result.matches])
+
+ # syn reports are coming at approx 60 Hz on most platforms
+ syn_per_second = 60.0
+ hours = syn_count / syn_per_second / 60.0 / 60.0
+ print "Processed ~%.2f hours of interaction" % hours
+
+ return result_dict
+
+ def SelectFiles(self, number):
+ """ Returns a random selection of files from the pool """
+ # list all log files
+ files = [os.path.abspath(os.path.join(log_dir, f))
+ for f in os.listdir(log_dir)
+ if f.isdigit()]
+
+ # randomly select subset of files
+ if number is not None:
+ files = random.sample(files, number)
+ return files
+
+ def DownloadFile(self, id):
+ """Download one feedback log into the pool."""
+ downloader = FeedbackDownloader()
+
+ filename = os.path.join(log_dir, id)
+ if os.path.exists(filename):
+ print 'Skipping existing report', id
+ return
+
+ print 'Downloading new report', id
+ try:
+ # might throw IO/Tar/Zip/etc exceptions
+ report = FeedbackLog(id, force_latest='pad')
+ # Test parse. Will throw exception on malformed log
+ json.loads(report.activity)
+ except:
+ print 'Invalid report %s' % id
+ return
+
+ # check if report contains logs and actual events
+ if report.activity and report.evdev and 'E:' in report.evdev:
+ report.SaveAs(filename)
+ else:
+ print 'Invalid report %s' % id
+
+ def Download(self, num, offset=0, parallel=True):
+ """Download 'num' new feedback logs into the pool."""
+ downloader = FeedbackDownloader()
+
+ # download list of feedback report id's
+ params = {
+ '$limit': str(num),
+ '$offset': str(offset),
+ 'mapping': ':list',
+ 'productId': '208' # ChromeOS
+ }
+ url = ('https://feedback.corp.google.com/resty/ReportSearch?' +
+ urllib.urlencode(params))
+ data = downloader.DownloadFile(url)
+ data = data[data.find('{'):] # strip garbage before json data
+
+ reports_json = json.loads(data)
+ report_ids = [item['id'] for item in reports_json['results']]
+
+ # Download and check each report
+ if parallel:
+ pool = multiprocessing.Pool()
+ results = pool.map(DownloadFileSubprocess, report_ids)
+ pool.terminate()
+ else:
+ results = map(DownloadFileSubprocess, report_ids)
+
+
+def ExecuteSingleSubprocess(args):
+ """ Wrapper for subprocesses to run ExecuteSingle """
+ try:
+ return QueryEngine().ExecuteSingle(args[0], args[1])
+ except Exception, e:
+ traceback.print_exc()
+ raise e
+
+
+def DownloadFileSubprocess(id):
+ """ Wrapper for subprocesses to run DownloadFile """
+ try:
+ return QueryEngine().DownloadFile(id)
+ except Exception, e:
+ traceback.print_exc()
+ raise e
\ No newline at end of file