Refacor shutdown logic in Goofy into a standalone factory test.

We can have better control over the execution of a standalone shutdown
test with automator. This CL also makes the behavior of Goofy RPC's
RunTest() method consistent so that a caller is able to get back run ID
of shutdown test.

max_reboot_time_secs is moved from test list options to 'shutdown' factory
test dargs.

BUG=none
TEST=make test
TEST=e2e test
TEST=manually test on my DUT

Change-Id: Ie204c438adf22e2da72b8ee6e852bbb34f8057ca
Reviewed-on: https://chromium-review.googlesource.com/188135
Reviewed-by: Jon Salz <jsalz@chromium.org>
Commit-Queue: Ricky Liang <jcliang@chromium.org>
Tested-by: Ricky Liang <jcliang@chromium.org>
diff --git a/py/goofy/goofy.py b/py/goofy/goofy.py
index b36e496..b0058ca 100755
--- a/py/goofy/goofy.py
+++ b/py/goofy/goofy.py
@@ -64,9 +64,6 @@
 
 CLEANUP_LOGS_PAUSED = '/var/lib/cleanup_logs_paused'
 
-# File that suppresses reboot if present (e.g., for development).
-NO_REBOOT_FILE = '/var/log/factory.noreboot'
-
 # Value for tests_after_shutdown that forces auto-run (e.g., after
 # a factory update, when the available set of tests might change).
 FORCE_AUTO_RUN = 'force_auto_run'
@@ -376,7 +373,7 @@
       self.visible_test.update_state(visible=False)
     self.visible_test = test
 
-  def _log_startup_messages(self):
+  def log_startup_messages(self):
     """Logs the tail of var/log/messages and mosys and EC console logs."""
     # TODO(jsalz): This is mostly a copy-and-paste of code in init_states,
     # for factory-3004.B only.  Consolidate and merge back to ToT.
@@ -415,119 +412,72 @@
     except:  # pylint: disable=W0702
       logging.exception('Error retrieving EC panic info')
 
-  def handle_shutdown_complete(self, test, test_state):
+  def shutdown(self, operation):
+    """Starts shutdown procedure.
+
+    Args:
+      operation: The shutdown operation (reboot or halt).
+    """
+    active_tests = []
+    for test in self.test_list.walk():
+      if not test.is_leaf():
+        continue
+
+      test_state = test.get_state()
+      if test_state.status == TestState.ACTIVE:
+        active_tests.append(test)
+
+
+    if not (len(active_tests) == 1 and
+            isinstance(active_tests[0], factory.ShutdownStep)):
+      logging.error(
+          'Calling Goofy shutdown outside of the shutdown factory test')
+      return
+
+    logging.info('Start Goofy shutdown (%s)', operation)
+    # Save pending test list in the state server
+    self.state_instance.set_shared_data(
+        'tests_after_shutdown',
+        [t.path for t in self.tests_to_run])
+    # Save shutdown time
+    self.state_instance.set_shared_data('shutdown_time', time.time())
+
+    with self.env.lock:
+      self.event_log.Log('shutdown', operation=operation)
+      shutdown_result = self.env.shutdown(operation)
+    if shutdown_result:
+      # That's all, folks!
+      self.run_queue.put(None)
+    else:
+      # Just pass (e.g., in the chroot).
+      self.state_instance.set_shared_data('tests_after_shutdown', None)
+      # Send event with no fields to indicate that there is no
+      # longer a pending shutdown.
+      self.event_client.post_event(Event(Event.Type.PENDING_SHUTDOWN))
+
+  def handle_shutdown_complete(self, test):
     """Handles the case where a shutdown was detected during a shutdown step.
 
     Args:
       test: The ShutdownStep.
-      test_state: The test state.
     """
     test_state = test.update_state(increment_shutdown_count=1)
     logging.info('Detected shutdown (%d of %d)',
-           test_state.shutdown_count, test.iterations)
+                 test_state.shutdown_count, test.iterations)
 
-    def log_and_update_state(status, error_msg, **kw):
-      self.event_log.Log('rebooted',
-                 status=status, error_msg=error_msg, **kw)
-      logging.info('Rebooted: status=%s, %s', status,
-                   (('error_msg=%s' % error_msg) if error_msg else None))
-      test.update_state(status=status, error_msg=error_msg)
+    # Insert current shutdown test at the front of the list of tests to run
+    # after shutdown.  This is to continue on post-shutdown verification in the
+    # shutdown step.
+    tests_after_shutdown = self.state_instance.get_shared_data(
+        'tests_after_shutdown', optional=True)
+    if not tests_after_shutdown:
+      self.state_instance.set_shared_data('tests_after_shutdown', [test.path])
+    elif isinstance(tests_after_shutdown, list):
+      self.state_instance.set_shared_data(
+          'tests_after_shutdown', [test.path] + tests_after_shutdown)
 
-    if not self.last_shutdown_time:
-      log_and_update_state(status=TestState.FAILED,
-                 error_msg='Unable to read shutdown_time')
-      return
-
-    now = time.time()
-    logging.info('%.03f s passed since reboot',
-           now - self.last_shutdown_time)
-
-    if self.last_shutdown_time > now:
-      test.update_state(status=TestState.FAILED,
-                error_msg='Time moved backward during reboot')
-    elif (isinstance(test, factory.RebootStep) and
-        self.test_list.options.max_reboot_time_secs and
-        (now - self.last_shutdown_time >
-         self.test_list.options.max_reboot_time_secs)):
-      # A reboot took too long; fail.  (We don't check this for
-      # HaltSteps, because the machine could be halted for a
-      # very long time, and even unplugged with battery backup,
-      # thus hosing the clock.)
-      log_and_update_state(
-        status=TestState.FAILED,
-        error_msg=('More than %d s elapsed during reboot '
-               '(%.03f s, from %s to %s)' % (
-            self.test_list.options.max_reboot_time_secs,
-            now - self.last_shutdown_time,
-            utils.TimeString(self.last_shutdown_time),
-            utils.TimeString(now))),
-        duration=(now-self.last_shutdown_time))
-      self._log_startup_messages()
-    elif test_state.shutdown_count == test.iterations:
-      # Good!
-      log_and_update_state(status=TestState.PASSED,
-                 duration=(now - self.last_shutdown_time),
-                 error_msg='')
-    elif test_state.shutdown_count > test.iterations:
-      # Shut down too many times
-      log_and_update_state(status=TestState.FAILED,
-                 error_msg='Too many shutdowns')
-      self._log_startup_messages()
-    elif utils.are_shift_keys_depressed():
-      logging.info('Shift keys are depressed; cancelling restarts')
-      # Abort shutdown
-      log_and_update_state(
-        status=TestState.FAILED,
-        error_msg='Shutdown aborted with double shift keys')
-      self.cancel_pending_tests()
-    else:
-      def handler():
-        if self._prompt_cancel_shutdown(
-          test, test_state.shutdown_count + 1):
-          factory.console.info('Shutdown aborted by operator')
-          log_and_update_state(
-            status=TestState.FAILED,
-            error_msg='Shutdown aborted by operator')
-          self.cancel_pending_tests()
-          return
-
-        # Time to shutdown again
-        log_and_update_state(
-          status=TestState.ACTIVE,
-          error_msg='',
-          iteration=test_state.shutdown_count)
-
-        self.event_log.Log('shutdown', operation='reboot')
-        self.state_instance.set_shared_data('shutdown_time',
-                        time.time())
-        self.env.shutdown('reboot')
-
-      self.on_ui_startup.append(handler)
-
-  def _prompt_cancel_shutdown(self, test, iteration):
-    if self.options.ui != 'chrome':
-      return False
-
-    pending_shutdown_data = {
-      'delay_secs': test.delay_secs,
-      'enable_guest_mode': test.enable_guest_mode,
-      'time': time.time() + test.delay_secs,
-      'operation': test.operation,
-      'iteration': iteration,
-      'iterations': test.iterations,
-      }
-
-    # Create a new (threaded) event client since we
-    # don't want to use the event loop for this.
-    with EventClient() as event_client:
-      event_client.post_event(Event(Event.Type.PENDING_SHUTDOWN,
-                      **pending_shutdown_data))
-      aborted = event_client.wait(
-        lambda event: event.type == Event.Type.CANCEL_SHUTDOWN,
-        timeout=test.delay_secs) is not None
-      if aborted:
-        event_client.post_event(Event(Event.Type.PENDING_SHUTDOWN))
-      return aborted
+    # Set 'post_shutdown' to inform shutdown test that a shutdown just occurred.
+    self.state_instance.set_shared_data('post_shutdown', True)
 
   def init_states(self):
     """Initializes all states on startup."""
@@ -553,7 +503,7 @@
         continue
       if isinstance(test, factory.ShutdownStep):
         # Shutdown while the test was active - that's good.
-        self.handle_shutdown_complete(test, test_state)
+        self.handle_shutdown_complete(test)
       else:
         # Unexpected shutdown.  Grab /var/log/messages for context.
         if var_log_messages is None:
@@ -805,68 +755,24 @@
                             error_msg=error_msg)
           continue
 
-      if isinstance(test, factory.ShutdownStep):
-        if os.path.exists(NO_REBOOT_FILE):
-          test.update_state(
-              status=TestState.FAILED, increment_count=1,
-              error_msg=('Skipped shutdown since %s is present' %
-                         NO_REBOOT_FILE))
-          continue
-
-        if (test.operation == factory.ShutdownStep.HALT and
-            self.options.automation_mode == AutomationMode.FULL):
-          logging.info('Skip halt in full automation mode.')
-          test.update_state(status=TestState.PASSED)
-          continue
-
-        test.update_state(status=TestState.ACTIVE, increment_count=1,
-                          error_msg='', shutdown_count=0)
-        if self._prompt_cancel_shutdown(test, 1):
-          self.event_log.Log('reboot_cancelled')
-          test.update_state(
-              status=TestState.FAILED, increment_count=1,
-              error_msg='Shutdown aborted by operator',
-              shutdown_count=0)
-          continue
-
-        # Save pending test list in the state server
-        self.state_instance.set_shared_data(
-            'tests_after_shutdown',
-            [t.path for t in self.tests_to_run])
-        # Save shutdown time
-        self.state_instance.set_shared_data('shutdown_time', time.time())
-
-        with self.env.lock:
-          self.event_log.Log('shutdown', operation=test.operation)
-          if (test.enable_guest_mode and
-              not os.path.exists(
-                  test_environment.DUTEnvironment.GUEST_MODE_TAG_FILE)):
-            # Create a temporary file GUEST_MODE_TAG_FILE to enable guest mode
-            # on next boot.
-            os.mknod(test_environment.DUTEnvironment.GUEST_MODE_TAG_FILE)
-          shutdown_result = self.env.shutdown(test.operation)
-        if shutdown_result:
-          # That's all, folks!
-          self.run_queue.put(None)
-          return
-        else:
-          # Just pass (e.g., in the chroot).
-          test.update_state(status=TestState.PASSED)
-          self.state_instance.set_shared_data('tests_after_shutdown', None)
-          # Send event with no fields to indicate that there is no
-          # longer a pending shutdown.
-          self.event_client.post_event(Event(Event.Type.PENDING_SHUTDOWN))
-          continue
-
-      self._run_test(test, test.iterations, test.retries)
+      if (isinstance(test, factory.ShutdownStep) and
+          self.state_instance.get_shared_data('post_shutdown', optional=True)):
+        # Invoking post shutdown method of shutdown test. We should retain the
+        # iterations_left and retries_left of the original test state.
+        test_state = self.state_instance.get_test_state(test.path)
+        self._run_test(test, test_state.iterations_left,
+                       test_state.retries_left)
+      else:
+        # Starts a new test run; reset iterations and retries.
+        self._run_test(test, test.iterations, test.retries)
 
   def _run_test(self, test, iterations_left=None, retries_left=None):
     invoc = TestInvocation(self, test, on_completion=self.run_next_test)
     new_state = test.update_state(
-      status=TestState.ACTIVE, increment_count=1, error_msg='',
-      invocation=invoc.uuid, iterations_left=iterations_left,
-      retries_left=retries_left,
-      visible=(self.visible_test == test))
+        status=TestState.ACTIVE, increment_count=1, error_msg='',
+        invocation=invoc.uuid, iterations_left=iterations_left,
+        retries_left=retries_left,
+        visible=(self.visible_test == test))
     invoc.count = new_state.count
 
     self.invocations[test] = invoc
@@ -1527,16 +1433,12 @@
     self.prespawner = Prespawner()
     self.prespawner.start()
 
-    try:
-      tests_after_shutdown = self.state_instance.get_shared_data(
-        'tests_after_shutdown')
-    except KeyError:
-      tests_after_shutdown = None
+    tests_after_shutdown = self.state_instance.get_shared_data(
+        'tests_after_shutdown', optional=True)
 
     force_auto_run = (tests_after_shutdown == FORCE_AUTO_RUN)
     if not force_auto_run and tests_after_shutdown is not None:
-      logging.info('Resuming tests after shutdown: %s',
-             tests_after_shutdown)
+      logging.info('Resuming tests after shutdown: %s', tests_after_shutdown)
       self.tests_to_run.extend(
           self.test_list.lookup_path(t) for t in tests_after_shutdown)
       self.run_queue.put(self.run_next_test)
@@ -1965,7 +1867,7 @@
 
     self.abort_active_tests('Operator requested restart of certain tests')
     for test in root.walk():
-      test.update_state(status=TestState.UNTESTED)
+      test.update_state(status=TestState.UNTESTED, shutdown_count=0)
     self.run_tests(root)
 
   def auto_run(self, starting_at=None, root=None):