blob: 0d5ae2a3618d734adfa6fa6a47afd6faef6c6408 [file] [log] [blame]
Dennis Kempind5b59022013-07-17 14:12:55 -07001#! /usr/bin/env python
2# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
Harry Cutts0edf1572020-01-21 15:42:10 -08005
Harry Cutts85378ee2020-02-07 15:53:46 -08006from __future__ import absolute_import
7from __future__ import division
Harry Cutts0edf1572020-01-21 15:42:10 -08008from __future__ import print_function
9
Dennis Kempind5b59022013-07-17 14:12:55 -070010from mtlib.log import FeedbackDownloader, FeedbackLog, Log
11from mtreplay import MTReplay
Dennis Kempin7432eb02014-03-18 13:41:41 -070012import fnmatch
Dennis Kempind5b59022013-07-17 14:12:55 -070013import json
14import multiprocessing
15import os
16import random
17import re
18import traceback
19import urllib
Sean O'Brienfd204da2017-05-02 15:13:11 -070020import datetime
Dennis Kempind5b59022013-07-17 14:12:55 -070021
22
23# prepare folder for log files
24script_dir = os.path.dirname(os.path.realpath(__file__))
25log_dir = os.path.join(script_dir, '../cache/logs/')
Sean O'Brienfd204da2017-05-02 15:13:11 -070026invalid_filename = os.path.join(log_dir, 'invalid')
Dennis Kempind5b59022013-07-17 14:12:55 -070027if not os.path.exists(log_dir):
28 os.mkdir(log_dir)
29
30
31class Query(object):
32 """ Abstract class for queries.
33
34 These objects are applied to files by the QueryEngine, which
35 calls FindMatches to execute the search on the replay results.
36
37 capture_logs can be set to true to direct the QueryEngine to return
38 the Log object in the QueryResults.
39 """
40 def __init__(self):
41 self.capture_logs = False
42
43 def FindMatches(self, replay_results, filename):
44 """ Returns a list of QueryMatch objects """
45 return []
46
47
48class QueryMatch(object):
49 """ Describes a match and contains information on how to locate it """
50 def __init__(self, filename, timestamp, line):
51 self.filename = filename
52 self.timestamp = timestamp
53 self.line = line
54
55 def __str__(self):
56 if self.timestamp:
57 return str(self.timestamp) + ": " + self.line
58 else:
59 return self.line
60
61 def __repr__(self):
62 return str(self)
63
64
65class QueryResult(object):
66 """ Describes the results of a query on a file.
67
68 This includes all matches found in this file, the number of
69 SYN reports processed and optionally the activity Log object,
70 if requested by the Query."""
71 def __init__(self, filename):
72 self.filename = filename
73 self.syn_count = 0
74 self.matches = []
75 self.log = None
76
77
78class QueryEngine(object):
79 """ This class allows queries to be executed on a large number of log files.
80
81 It managed a pool of log files, allows more log files to be downloaded and
82 can execute queries in parallel on this pool of log files.
83 """
84
85 def ExecuteSingle(self, filename, query):
86 """ Executes a query on a single log file """
87 log = Log(filename)
88 replay = MTReplay()
89 result = QueryResult(filename)
90
91 # find platform for file
92 platform = replay.PlatformOf(log)
93 if not platform:
Harry Cutts0edf1572020-01-21 15:42:10 -080094 print("No platform for %s" % os.path.basename(filename))
Dennis Kempind5b59022013-07-17 14:12:55 -070095 return result
96
97 # count the number of syn reports in log file
98 result.syn_count = len(tuple(re.finditer("0000 0000 0", log.evdev)))
99
100 # run replay
101 try:
102 replay_result = replay.Replay(log)
103 except:
104 return result
105
106 result.matches = query.FindMatches(replay_result, filename)
107 if result.matches:
108 result.log = replay_result.log
109
110 return result
111
112 def Execute(self, filenames, queries, parallel=True):
113 """ Executes a query on a list of log files.
114
115 filenames: list of filenames to execute
116 queries: either a single query object for all files,
117 or a dictionary mapping filenames to query objects.
118 parallel: set to False to execute sequentially.
119 """
120
Harry Cutts0edf1572020-01-21 15:42:10 -0800121 print("Processing %d log files" % len(filenames))
Dennis Kempind5b59022013-07-17 14:12:55 -0700122
123 if hasattr(queries, 'FindMatches'):
124 queries = dict([(filename, queries) for filename in filenames])
125
126 # arguments for QuerySubprocess
127 parameters = [(name, queries[name])
128 for name in filenames if name in queries]
129
130 # process all files either in parallel or sequential
131 if parallel:
132 pool = multiprocessing.Pool()
133 results = pool.map(ExecuteSingleSubprocess, parameters)
134 pool.terminate()
135 else:
136 results = map(ExecuteSingleSubprocess, parameters)
137
138 # count syn reports
139 syn_count = sum([result.syn_count for result in results])
140
141 # create dict of results by filename
142 result_dict = dict([(result.filename, result)
143 for result in results
144 if result.matches])
145
146 # syn reports are coming at approx 60 Hz on most platforms
147 syn_per_second = 60.0
148 hours = syn_count / syn_per_second / 60.0 / 60.0
Harry Cutts0edf1572020-01-21 15:42:10 -0800149 print("Processed ~%.2f hours of interaction" % hours)
Dennis Kempind5b59022013-07-17 14:12:55 -0700150
151 return result_dict
152
Dennis Kempin7432eb02014-03-18 13:41:41 -0700153 def SelectFiles(self, number, platform=None):
Dennis Kempind5b59022013-07-17 14:12:55 -0700154 """ Returns a random selection of files from the pool """
155 # list all log files
156 files = [os.path.abspath(os.path.join(log_dir, f))
157 for f in os.listdir(log_dir)
158 if f.isdigit()]
159
Dennis Kempin7432eb02014-03-18 13:41:41 -0700160 if platform:
Harry Cutts0edf1572020-01-21 15:42:10 -0800161 print("Filtering files by platform. This may take a while.")
Dennis Kempin7432eb02014-03-18 13:41:41 -0700162 replay = MTReplay()
163 pool = multiprocessing.Pool()
164 platforms = pool.map(GetPlatformSubprocess, files)
165 pool.terminate()
166
167 filtered = filter(lambda (f, p): p and fnmatch.fnmatch(p, platform),
168 platforms)
169 files = map(lambda (f, p): f, filtered)
Harry Cutts0edf1572020-01-21 15:42:10 -0800170 print("found", len(files), "log files matching", platform)
Dennis Kempin7432eb02014-03-18 13:41:41 -0700171
Dennis Kempind5b59022013-07-17 14:12:55 -0700172 # randomly select subset of files
173 if number is not None:
174 files = random.sample(files, number)
175 return files
176
Sean O'Brienfd204da2017-05-02 15:13:11 -0700177 def GetInvalidIDs(self):
178 """Look for list of feedback IDs with invalid logs"""
179 if not os.path.exists(invalid_filename):
180 return []
181 return [x.strip() for x in open(invalid_filename).readlines()]
Dennis Kempind5b59022013-07-17 14:12:55 -0700182
Sean O'Brienfd204da2017-05-02 15:13:11 -0700183 def DownloadFile(self, id, downloader, invalid_ids):
184 """Download one feedback log into the pool.
185
186 Return 1 if successful, 0 if not.
187 """
Dennis Kempind5b59022013-07-17 14:12:55 -0700188 filename = os.path.join(log_dir, id)
189 if os.path.exists(filename):
Harry Cutts0edf1572020-01-21 15:42:10 -0800190 print("Skipping existing report", id)
Sean O'Brienfd204da2017-05-02 15:13:11 -0700191 return 0
192 if id in invalid_ids:
Harry Cutts0edf1572020-01-21 15:42:10 -0800193 print("Skipping invalid report", id)
Sean O'Brienfd204da2017-05-02 15:13:11 -0700194 return 0
Dennis Kempind5b59022013-07-17 14:12:55 -0700195
Harry Cutts0edf1572020-01-21 15:42:10 -0800196 print("Downloading new report", id)
Dennis Kempind5b59022013-07-17 14:12:55 -0700197 try:
198 # might throw IO/Tar/Zip/etc exceptions
Sean O'Brienfd204da2017-05-02 15:13:11 -0700199 report = FeedbackLog(id, force_latest='pad', downloader=downloader)
Dennis Kempind5b59022013-07-17 14:12:55 -0700200 # Test parse. Will throw exception on malformed log
201 json.loads(report.activity)
202 except:
Harry Cutts0edf1572020-01-21 15:42:10 -0800203 print("Invalid report", id)
Sean O'Brienfd204da2017-05-02 15:13:11 -0700204 with open(invalid_filename, 'a') as f:
205 f.write(id + '\n')
206 return 0
Dennis Kempind5b59022013-07-17 14:12:55 -0700207
208 # check if report contains logs and actual events
209 if report.activity and report.evdev and 'E:' in report.evdev:
210 report.SaveAs(filename)
Sean O'Brienfd204da2017-05-02 15:13:11 -0700211 return 1
Dennis Kempind5b59022013-07-17 14:12:55 -0700212 else:
Harry Cutts0edf1572020-01-21 15:42:10 -0800213 print("Invalid report %s" % id)
Sean O'Brienfd204da2017-05-02 15:13:11 -0700214 with open(invalid_filename, 'a') as f:
215 f.write(id + '\n')
216 return 0
Dennis Kempind5b59022013-07-17 14:12:55 -0700217
Sean O'Brienfd204da2017-05-02 15:13:11 -0700218 def Download(self, num_to_download, parallel=True):
Dennis Kempind5b59022013-07-17 14:12:55 -0700219 """Download 'num' new feedback logs into the pool."""
220 downloader = FeedbackDownloader()
221
Sean O'Brienfd204da2017-05-02 15:13:11 -0700222 dt = datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1)
Sean O'Brienfd204da2017-05-02 15:13:11 -0700223 num_to_download = int(num_to_download)
224 num_downloaded = 0
225 invalid_ids = self.GetInvalidIDs()
226 page_token = ''
Dennis Kempind5b59022013-07-17 14:12:55 -0700227
Sean O'Brienfd204da2017-05-02 15:13:11 -0700228 while num_to_download > num_downloaded:
229 # Download list of feedback report id's
230 num_this_iteration = min((num_to_download - num_downloaded) * 5, 500)
231 page_token, report_ids = downloader.DownloadIDs(
Harry Cutts85378ee2020-02-07 15:53:46 -0800232 num_this_iteration, page_token=page_token)
Dennis Kempind5b59022013-07-17 14:12:55 -0700233
Sean O'Brienfd204da2017-05-02 15:13:11 -0700234 # Download and check each report
235 parameters = [(r_id, downloader, invalid_ids) for r_id in report_ids]
236 if parallel:
237 pool = multiprocessing.Pool()
238 results = sum(pool.map(DownloadFileSubprocess, parameters))
239 pool.terminate()
240 else:
241 results = sum(map(DownloadFileSubprocess, parameters))
242 num_downloaded += results
Harry Cutts0edf1572020-01-21 15:42:10 -0800243 print("--------------------")
244 print("%d/%d reports found" % (num_downloaded, num_to_download))
245 print("--------------------")
Sean O'Brienfd204da2017-05-02 15:13:11 -0700246
Dennis Kempind5b59022013-07-17 14:12:55 -0700247
Dennis Kempin7432eb02014-03-18 13:41:41 -0700248def GetPlatformSubprocess(filename):
249 replay = MTReplay()
250 log = Log(filename)
251 detected_platform = replay.PlatformOf(log)
252 if detected_platform:
Harry Cutts0edf1572020-01-21 15:42:10 -0800253 print(filename + ": " + detected_platform.name)
Dennis Kempin7432eb02014-03-18 13:41:41 -0700254 return filename, detected_platform.name
255 else:
256 return filename, None
Dennis Kempind5b59022013-07-17 14:12:55 -0700257
258def ExecuteSingleSubprocess(args):
259 """ Wrapper for subprocesses to run ExecuteSingle """
260 try:
261 return QueryEngine().ExecuteSingle(args[0], args[1])
Harry Cutts85378ee2020-02-07 15:53:46 -0800262 except Exception as e:
Dennis Kempind5b59022013-07-17 14:12:55 -0700263 traceback.print_exc()
264 raise e
265
Sean O'Brienfd204da2017-05-02 15:13:11 -0700266def DownloadFileSubprocess(args):
Dennis Kempind5b59022013-07-17 14:12:55 -0700267 """ Wrapper for subprocesses to run DownloadFile """
268 try:
Sean O'Brienfd204da2017-05-02 15:13:11 -0700269 return QueryEngine().DownloadFile(args[0], args[1], args[2])
Harry Cutts85378ee2020-02-07 15:53:46 -0800270 except Exception as e:
Dennis Kempind5b59022013-07-17 14:12:55 -0700271 traceback.print_exc()
Sean O'Brienfd204da2017-05-02 15:13:11 -0700272 raise e