Fix automatic feedback downloading The Feedback team updated their API and requires use of GaiaMint credentials, which need to be refreshed periodically. This CL uses the new credentials and API to download batches of feedback reports. It must be run outside the chroot. For example, 100 feedback reports can be downloaded like so: $ prodaccess $ mtstat -d 100 BUG=none TEST=manually tested downloading up to 1000 reports Change-Id: I0131e972358c05465ab982e07534814f48265fce Reviewed-on: https://chromium-review.googlesource.com/495646 Commit-Ready: Sean O'Brien <seobrien@chromium.org> Tested-by: Sean O'Brien <seobrien@chromium.org> Reviewed-by: Dennis Kempin <denniskempin@google.com>

commit: fd204daec02b1f4e3423e501335b61a6e84ae8ec [log] [tgz]
author: Sean O'Brien <seobrien@google.com> Tue May 02 15:13:11 2017 -0700
committer: chrome-bot <chrome-bot@chromium.org> Wed May 10 11:58:27 2017 -0700
tree: cb5f40775dac7530bc566c3eb202c2badba6cf6e
parent: 8da2c778eafaa0eb961308fcb5426dde45b08c5f [diff] [blame]
diff --git a/mtstat/queryengine.py b/mtstat/queryengine.py
index 32c077e..b24a6b7 100755
--- a/mtstat/queryengine.py
+++ b/mtstat/queryengine.py

@@ -12,11 +12,13 @@
 import re
 import traceback
 import urllib
+import datetime
 
 
 # prepare folder for log files
 script_dir = os.path.dirname(os.path.realpath(__file__))
 log_dir = os.path.join(script_dir, '../cache/logs/')
+invalid_filename = os.path.join(log_dir, 'invalid')
 if not os.path.exists(log_dir):
   os.mkdir(log_dir)
 
@@ -167,57 +169,78 @@
       files = random.sample(files, number)
     return files
 
-  def DownloadFile(self, id):
-    """Download one feedback log into the pool."""
-    downloader = FeedbackDownloader()
+  def GetInvalidIDs(self):
+    """Look for list of feedback IDs with invalid logs"""
+    if not os.path.exists(invalid_filename):
+      return []
+    return [x.strip() for x in open(invalid_filename).readlines()]
 
+  def DownloadFile(self, id, downloader, invalid_ids):
+    """Download one feedback log into the pool.
+
+    Return 1 if successful, 0 if not.
+    """
     filename = os.path.join(log_dir, id)
     if os.path.exists(filename):
       print 'Skipping existing report', id
-      return
+      return 0
+    if id in invalid_ids:
+      print 'Skipping invalid report', id
+      return 0
 
     print 'Downloading new report', id
     try:
       # might throw IO/Tar/Zip/etc exceptions
-      report = FeedbackLog(id, force_latest='pad')
+      report = FeedbackLog(id, force_latest='pad', downloader=downloader)
       # Test parse. Will throw exception on malformed log
       json.loads(report.activity)
     except:
-      print 'Invalid report %s' % id
-      return
+      print 'Invalid report', id
+      with open(invalid_filename, 'a') as f:
+        f.write(id + '\n')
+      return 0
 
     # check if report contains logs and actual events
     if report.activity and report.evdev and 'E:' in report.evdev:
       report.SaveAs(filename)
+      return 1
     else:
       print 'Invalid report %s' % id
+      with open(invalid_filename, 'a') as f:
+        f.write(id + '\n')
+      return 0
 
-  def Download(self, num, offset=0, parallel=True):
+  def Download(self, num_to_download, parallel=True):
     """Download 'num' new feedback logs into the pool."""
     downloader = FeedbackDownloader()
 
-    # download list of feedback report id's
-    params = {
-      '$limit': str(num),
-      '$offset': str(offset),
-      'mapping': ':list',
-      'productId': '208'  # ChromeOS
-    }
-    url = ('https://feedback.corp.google.com/resty/ReportSearch?' +
-           urllib.urlencode(params))
-    data = downloader.DownloadFile(url)
-    data = data[data.find('{'):]  # strip garbage before json data
+    dt = datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1)
+    end_time = (((dt.days * 24 * 60 * 60 + dt.seconds) * 1000) +
+                (dt.microseconds / 10))
+    num_to_download = int(num_to_download)
+    num_downloaded = 0
+    invalid_ids = self.GetInvalidIDs()
+    page_token = ''
 
-    reports_json = json.loads(data)
-    report_ids = [item['id'] for item in reports_json['results']]
+    while num_to_download > num_downloaded:
+      # Download list of feedback report id's
+      num_this_iteration = min((num_to_download - num_downloaded) * 5, 500)
+      page_token, report_ids = downloader.DownloadIDs(
+          num_this_iteration, end_time, page_token)
 
-    # Download and check each report
-    if parallel:
-      pool = multiprocessing.Pool()
-      results = pool.map(DownloadFileSubprocess, report_ids)
-      pool.terminate()
-    else:
-      results = map(DownloadFileSubprocess, report_ids)
+      # Download and check each report
+      parameters = [(r_id, downloader, invalid_ids) for r_id in report_ids]
+      if parallel:
+        pool = multiprocessing.Pool()
+        results = sum(pool.map(DownloadFileSubprocess, parameters))
+        pool.terminate()
+      else:
+        results = sum(map(DownloadFileSubprocess, parameters))
+      num_downloaded += results
+      print "--------------------"
+      print "%d/%d reports found" % (num_downloaded, num_to_download)
+      print "--------------------"
+
 
 def GetPlatformSubprocess(filename):
   replay = MTReplay()
@@ -237,11 +260,10 @@
     traceback.print_exc()
     raise e
 
-
-def DownloadFileSubprocess(id):
+def DownloadFileSubprocess(args):
   """ Wrapper for subprocesses to run DownloadFile """
   try:
-    return QueryEngine().DownloadFile(id)
+    return QueryEngine().DownloadFile(args[0], args[1], args[2])
   except Exception, e:
     traceback.print_exc()
-    raise e
\ No newline at end of file
+    raise e
commit	fd204daec02b1f4e3423e501335b61a6e84ae8ec	[log] [tgz]
author	Sean O'Brien <seobrien@google.com>	Tue May 02 15:13:11 2017 -0700
committer	chrome-bot <chrome-bot@chromium.org>	Wed May 10 11:58:27 2017 -0700
tree	cb5f40775dac7530bc566c3eb202c2badba6cf6e
parent	8da2c778eafaa0eb961308fcb5426dde45b08c5f [diff] [blame]