blob: ab96b52deeb8b7c469ac3481300dfef5998daa73 [file] [log] [blame]
Dennis Kempind5b59022013-07-17 14:12:55 -07001#! /usr/bin/env python
2# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
Harry Cutts0edf1572020-01-21 15:42:10 -08005
6from __future__ import print_function
7
Dennis Kempind5b59022013-07-17 14:12:55 -07008from mtlib.log import FeedbackDownloader, FeedbackLog, Log
9from mtreplay import MTReplay
Dennis Kempin7432eb02014-03-18 13:41:41 -070010import fnmatch
Dennis Kempind5b59022013-07-17 14:12:55 -070011import json
12import multiprocessing
13import os
14import random
15import re
16import traceback
17import urllib
Sean O'Brienfd204da2017-05-02 15:13:11 -070018import datetime
Dennis Kempind5b59022013-07-17 14:12:55 -070019
20
21# prepare folder for log files
22script_dir = os.path.dirname(os.path.realpath(__file__))
23log_dir = os.path.join(script_dir, '../cache/logs/')
Sean O'Brienfd204da2017-05-02 15:13:11 -070024invalid_filename = os.path.join(log_dir, 'invalid')
Dennis Kempind5b59022013-07-17 14:12:55 -070025if not os.path.exists(log_dir):
26 os.mkdir(log_dir)
27
28
29class Query(object):
30 """ Abstract class for queries.
31
32 These objects are applied to files by the QueryEngine, which
33 calls FindMatches to execute the search on the replay results.
34
35 capture_logs can be set to true to direct the QueryEngine to return
36 the Log object in the QueryResults.
37 """
38 def __init__(self):
39 self.capture_logs = False
40
41 def FindMatches(self, replay_results, filename):
42 """ Returns a list of QueryMatch objects """
43 return []
44
45
46class QueryMatch(object):
47 """ Describes a match and contains information on how to locate it """
48 def __init__(self, filename, timestamp, line):
49 self.filename = filename
50 self.timestamp = timestamp
51 self.line = line
52
53 def __str__(self):
54 if self.timestamp:
55 return str(self.timestamp) + ": " + self.line
56 else:
57 return self.line
58
59 def __repr__(self):
60 return str(self)
61
62
63class QueryResult(object):
64 """ Describes the results of a query on a file.
65
66 This includes all matches found in this file, the number of
67 SYN reports processed and optionally the activity Log object,
68 if requested by the Query."""
69 def __init__(self, filename):
70 self.filename = filename
71 self.syn_count = 0
72 self.matches = []
73 self.log = None
74
75
76class QueryEngine(object):
77 """ This class allows queries to be executed on a large number of log files.
78
79 It managed a pool of log files, allows more log files to be downloaded and
80 can execute queries in parallel on this pool of log files.
81 """
82
83 def ExecuteSingle(self, filename, query):
84 """ Executes a query on a single log file """
85 log = Log(filename)
86 replay = MTReplay()
87 result = QueryResult(filename)
88
89 # find platform for file
90 platform = replay.PlatformOf(log)
91 if not platform:
Harry Cutts0edf1572020-01-21 15:42:10 -080092 print("No platform for %s" % os.path.basename(filename))
Dennis Kempind5b59022013-07-17 14:12:55 -070093 return result
94
95 # count the number of syn reports in log file
96 result.syn_count = len(tuple(re.finditer("0000 0000 0", log.evdev)))
97
98 # run replay
99 try:
100 replay_result = replay.Replay(log)
101 except:
102 return result
103
104 result.matches = query.FindMatches(replay_result, filename)
105 if result.matches:
106 result.log = replay_result.log
107
108 return result
109
110 def Execute(self, filenames, queries, parallel=True):
111 """ Executes a query on a list of log files.
112
113 filenames: list of filenames to execute
114 queries: either a single query object for all files,
115 or a dictionary mapping filenames to query objects.
116 parallel: set to False to execute sequentially.
117 """
118
Harry Cutts0edf1572020-01-21 15:42:10 -0800119 print("Processing %d log files" % len(filenames))
Dennis Kempind5b59022013-07-17 14:12:55 -0700120
121 if hasattr(queries, 'FindMatches'):
122 queries = dict([(filename, queries) for filename in filenames])
123
124 # arguments for QuerySubprocess
125 parameters = [(name, queries[name])
126 for name in filenames if name in queries]
127
128 # process all files either in parallel or sequential
129 if parallel:
130 pool = multiprocessing.Pool()
131 results = pool.map(ExecuteSingleSubprocess, parameters)
132 pool.terminate()
133 else:
134 results = map(ExecuteSingleSubprocess, parameters)
135
136 # count syn reports
137 syn_count = sum([result.syn_count for result in results])
138
139 # create dict of results by filename
140 result_dict = dict([(result.filename, result)
141 for result in results
142 if result.matches])
143
144 # syn reports are coming at approx 60 Hz on most platforms
145 syn_per_second = 60.0
146 hours = syn_count / syn_per_second / 60.0 / 60.0
Harry Cutts0edf1572020-01-21 15:42:10 -0800147 print("Processed ~%.2f hours of interaction" % hours)
Dennis Kempind5b59022013-07-17 14:12:55 -0700148
149 return result_dict
150
Dennis Kempin7432eb02014-03-18 13:41:41 -0700151 def SelectFiles(self, number, platform=None):
Dennis Kempind5b59022013-07-17 14:12:55 -0700152 """ Returns a random selection of files from the pool """
153 # list all log files
154 files = [os.path.abspath(os.path.join(log_dir, f))
155 for f in os.listdir(log_dir)
156 if f.isdigit()]
157
Dennis Kempin7432eb02014-03-18 13:41:41 -0700158 if platform:
Harry Cutts0edf1572020-01-21 15:42:10 -0800159 print("Filtering files by platform. This may take a while.")
Dennis Kempin7432eb02014-03-18 13:41:41 -0700160 replay = MTReplay()
161 pool = multiprocessing.Pool()
162 platforms = pool.map(GetPlatformSubprocess, files)
163 pool.terminate()
164
165 filtered = filter(lambda (f, p): p and fnmatch.fnmatch(p, platform),
166 platforms)
167 files = map(lambda (f, p): f, filtered)
Harry Cutts0edf1572020-01-21 15:42:10 -0800168 print("found", len(files), "log files matching", platform)
Dennis Kempin7432eb02014-03-18 13:41:41 -0700169
Dennis Kempind5b59022013-07-17 14:12:55 -0700170 # randomly select subset of files
171 if number is not None:
172 files = random.sample(files, number)
173 return files
174
Sean O'Brienfd204da2017-05-02 15:13:11 -0700175 def GetInvalidIDs(self):
176 """Look for list of feedback IDs with invalid logs"""
177 if not os.path.exists(invalid_filename):
178 return []
179 return [x.strip() for x in open(invalid_filename).readlines()]
Dennis Kempind5b59022013-07-17 14:12:55 -0700180
Sean O'Brienfd204da2017-05-02 15:13:11 -0700181 def DownloadFile(self, id, downloader, invalid_ids):
182 """Download one feedback log into the pool.
183
184 Return 1 if successful, 0 if not.
185 """
Dennis Kempind5b59022013-07-17 14:12:55 -0700186 filename = os.path.join(log_dir, id)
187 if os.path.exists(filename):
Harry Cutts0edf1572020-01-21 15:42:10 -0800188 print("Skipping existing report", id)
Sean O'Brienfd204da2017-05-02 15:13:11 -0700189 return 0
190 if id in invalid_ids:
Harry Cutts0edf1572020-01-21 15:42:10 -0800191 print("Skipping invalid report", id)
Sean O'Brienfd204da2017-05-02 15:13:11 -0700192 return 0
Dennis Kempind5b59022013-07-17 14:12:55 -0700193
Harry Cutts0edf1572020-01-21 15:42:10 -0800194 print("Downloading new report", id)
Dennis Kempind5b59022013-07-17 14:12:55 -0700195 try:
196 # might throw IO/Tar/Zip/etc exceptions
Sean O'Brienfd204da2017-05-02 15:13:11 -0700197 report = FeedbackLog(id, force_latest='pad', downloader=downloader)
Dennis Kempind5b59022013-07-17 14:12:55 -0700198 # Test parse. Will throw exception on malformed log
199 json.loads(report.activity)
200 except:
Harry Cutts0edf1572020-01-21 15:42:10 -0800201 print("Invalid report", id)
Sean O'Brienfd204da2017-05-02 15:13:11 -0700202 with open(invalid_filename, 'a') as f:
203 f.write(id + '\n')
204 return 0
Dennis Kempind5b59022013-07-17 14:12:55 -0700205
206 # check if report contains logs and actual events
207 if report.activity and report.evdev and 'E:' in report.evdev:
208 report.SaveAs(filename)
Sean O'Brienfd204da2017-05-02 15:13:11 -0700209 return 1
Dennis Kempind5b59022013-07-17 14:12:55 -0700210 else:
Harry Cutts0edf1572020-01-21 15:42:10 -0800211 print("Invalid report %s" % id)
Sean O'Brienfd204da2017-05-02 15:13:11 -0700212 with open(invalid_filename, 'a') as f:
213 f.write(id + '\n')
214 return 0
Dennis Kempind5b59022013-07-17 14:12:55 -0700215
Sean O'Brienfd204da2017-05-02 15:13:11 -0700216 def Download(self, num_to_download, parallel=True):
Dennis Kempind5b59022013-07-17 14:12:55 -0700217 """Download 'num' new feedback logs into the pool."""
218 downloader = FeedbackDownloader()
219
Sean O'Brienfd204da2017-05-02 15:13:11 -0700220 dt = datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1)
221 end_time = (((dt.days * 24 * 60 * 60 + dt.seconds) * 1000) +
222 (dt.microseconds / 10))
223 num_to_download = int(num_to_download)
224 num_downloaded = 0
225 invalid_ids = self.GetInvalidIDs()
226 page_token = ''
Dennis Kempind5b59022013-07-17 14:12:55 -0700227
Sean O'Brienfd204da2017-05-02 15:13:11 -0700228 while num_to_download > num_downloaded:
229 # Download list of feedback report id's
230 num_this_iteration = min((num_to_download - num_downloaded) * 5, 500)
231 page_token, report_ids = downloader.DownloadIDs(
232 num_this_iteration, end_time, page_token)
Dennis Kempind5b59022013-07-17 14:12:55 -0700233
Sean O'Brienfd204da2017-05-02 15:13:11 -0700234 # Download and check each report
235 parameters = [(r_id, downloader, invalid_ids) for r_id in report_ids]
236 if parallel:
237 pool = multiprocessing.Pool()
238 results = sum(pool.map(DownloadFileSubprocess, parameters))
239 pool.terminate()
240 else:
241 results = sum(map(DownloadFileSubprocess, parameters))
242 num_downloaded += results
Harry Cutts0edf1572020-01-21 15:42:10 -0800243 print("--------------------")
244 print("%d/%d reports found" % (num_downloaded, num_to_download))
245 print("--------------------")
Sean O'Brienfd204da2017-05-02 15:13:11 -0700246
Dennis Kempind5b59022013-07-17 14:12:55 -0700247
Dennis Kempin7432eb02014-03-18 13:41:41 -0700248def GetPlatformSubprocess(filename):
249 replay = MTReplay()
250 log = Log(filename)
251 detected_platform = replay.PlatformOf(log)
252 if detected_platform:
Harry Cutts0edf1572020-01-21 15:42:10 -0800253 print(filename + ": " + detected_platform.name)
Dennis Kempin7432eb02014-03-18 13:41:41 -0700254 return filename, detected_platform.name
255 else:
256 return filename, None
Dennis Kempind5b59022013-07-17 14:12:55 -0700257
258def ExecuteSingleSubprocess(args):
259 """ Wrapper for subprocesses to run ExecuteSingle """
260 try:
261 return QueryEngine().ExecuteSingle(args[0], args[1])
262 except Exception, e:
263 traceback.print_exc()
264 raise e
265
Sean O'Brienfd204da2017-05-02 15:13:11 -0700266def DownloadFileSubprocess(args):
Dennis Kempind5b59022013-07-17 14:12:55 -0700267 """ Wrapper for subprocesses to run DownloadFile """
268 try:
Sean O'Brienfd204da2017-05-02 15:13:11 -0700269 return QueryEngine().DownloadFile(args[0], args[1], args[2])
Dennis Kempind5b59022013-07-17 14:12:55 -0700270 except Exception, e:
271 traceback.print_exc()
Sean O'Brienfd204da2017-05-02 15:13:11 -0700272 raise e