[Autotest] merge cleanup and verify

The objective of this CL is to merge cleanup and verify into a single job to
reduce run time of each test. In existing design, by default, a cleanup job is
scheduled after a test is finished, and a verify job is scheduled before a
test is started. By merging these two jobs together, we are seeing the total
run time of these two jobs is reduced from about 47s to 37s, around 10s saving.
That does not include the saving on scheduler to schedule two jobs, which may
take another 5-10s.

The design is to create a new special task, reset, which runs at the beginning
of a job by default. Verify task is changed to not to run by default before a
job starts. Cleanup job will only be run if a job is scheduled to reboot and
any test failed in that job.

BUG=chromium:220679
TEST=tested with run_suite in local machine
DEPLOY=afe,apache,scheduler,change all users' preference on reboot_after to
Never, sql: |update chromeos_autotest_db.afe_users set reboot_after=0|

Change-Id: Ia38baf6b73897b7e09fdf635eadedc752b5eba2f
Reviewed-on: https://gerrit.chromium.org/gerrit/48685
Commit-Queue: Dan Shi <dshi@chromium.org>
Reviewed-by: Dan Shi <dshi@chromium.org>
Tested-by: Dan Shi <dshi@chromium.org>
diff --git a/server/server_job.py b/server/server_job.py
index fcf7d77..2f50492 100644
--- a/server/server_job.py
+++ b/server/server_job.py
@@ -1,3 +1,5 @@
+# pylint: disable-msg=C0111
+
 # Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
@@ -39,6 +41,7 @@
 REPAIR_CONTROL_FILE = _control_segment_path('repair')
 PROVISION_CONTROL_FILE = _control_segment_path('provision')
 VERIFY_JOB_REPO_URL_CONTROL_FILE = _control_segment_path('verify_job_repo_url')
+RESET_CONTROL_FILE = _control_segment_path('reset')
 
 
 # by default provide a stub that generates no site data
@@ -334,6 +337,7 @@
 
 
     def verify(self):
+        """Verify machines are all ssh-able."""
         if not self.machines:
             raise error.AutoservError('No machines specified to verify')
         if self.resultdir:
@@ -350,6 +354,26 @@
             raise
 
 
+    def reset(self):
+        """Reset machines by first cleanup then verify each machine."""
+        if not self.machines:
+            raise error.AutoservError('No machines specified to reset.')
+        if self.resultdir:
+            os.chdir(self.resultdir)
+
+        try:
+            namespace = {'machines' : self.machines, 'job' : self,
+                         'ssh_user' : self._ssh_user,
+                         'ssh_port' : self._ssh_port,
+                         'ssh_pass' : self._ssh_pass}
+            self._execute_code(RESET_CONTROL_FILE, namespace, protect=False)
+        except Exception as e:
+            msg = ('Reset failed\n' + str(e) + '\n' +
+                   traceback.format_exc())
+            self.record('ABORT', None, None, msg)
+            raise
+
+
     def repair(self, host_protection):
         if not self.machines:
             raise error.AutoservError('No machines specified to repair')
@@ -878,6 +902,10 @@
         @param update_func - a function that updates the list of uncollected
             logs. Should take one parameter, the list to be updated.
         """
+        # Skip log collection if file _uncollected_log_file does not exist.
+        if not (self._uncollected_log_file and
+                os.path.exists(self._uncollected_log_file)):
+            return
         if self._uncollected_log_file:
             log_file = open(self._uncollected_log_file, "r+")
             fcntl.flock(log_file, fcntl.LOCK_EX)