bisect-kit: refactor test value handling Currently, eval script converts performance score into boolean values. Bisector's main algorithm has no knowledge of the actual value. This CL moves the logic from eval script to bisector (from eval_cros_autotest.py to strategy.py). With this refactoring, bisector can tweak the algorithm later specific for numeric value bisection (instead of boolean value), for example, - recompute initial values - even more, bisect according to the distribution of values BUG=chromium:881238 TEST=run diagnose_cros_autotest.py manually Change-Id: I144b91c46be50b72cd01cb48c12adcd8660b67ec Reviewed-on: https://chromium-review.googlesource.com/1556981 Commit-Ready: Kuang-che Wu <kcwu@chromium.org> Tested-by: Kuang-che Wu <kcwu@chromium.org> Reviewed-by: Chung-yih Wang <cywang@chromium.org> Reviewed-by: Chi-Ngai Wan <cnwan@google.com>

commit: 81cde45e589f3ba341c06837fb1c0d775c0760ba [log] [tgz]
author: Kuang-che Wu <kcwu@chromium.org> Mon Apr 08 16:56:51 2019 +0800
committer: chrome-bot <chrome-bot@chromium.org> Tue Apr 09 01:27:05 2019 -0700
tree: 575570b57145822cc1db518cce1ac3103453ec11
parent: 356ecb9c2920905a55fec2b078e79d2f50755d57 [diff] [blame]
diff --git a/bisect_kit/cli.py b/bisect_kit/cli.py
index 97d4e52..698d733 100644
--- a/bisect_kit/cli.py
+++ b/bisect_kit/cli.py

@@ -401,7 +401,11 @@
     new_idx = revlist.index(config['new'])
     assert old_idx < new_idx
 
-    config.update(confidence=opts.confidence, noisy=opts.noisy)
+    config.update(
+        confidence=opts.confidence,
+        noisy=opts.noisy,
+        old_value=opts.old_value,
+        new_value=opts.new_value)
 
     self.states.init(config, revlist)
     self.states.save()
@@ -438,6 +442,9 @@
     t1 = time.time()
     if status == 'skip':
       return 'eval', status, values
+
+    if self.strategy.is_value_bisection():
+      status = self.strategy.classify_result_from_values(values)
     self.states.data['stats']['eval_count'] += 1
     self.states.data['stats']['eval_time'] += t1 - t0
 
@@ -490,7 +497,11 @@
           self.strategy.check_verification_range()
 
         step, status, values = self._switch_and_eval(rev, prev_rev=prev_rev)
-        logger.info('rev=%s status => %s', rev, self._format_status(status))
+        if self.strategy.is_value_bisection():
+          logger.info('rev=%s status => %s: %s', rev,
+                      self._format_status(status), values)
+        else:
+          logger.info('rev=%s status => %s', rev, self._format_status(status))
         force = False
 
         self.states.add_sample(idx, status, values=values)
@@ -631,6 +642,8 @@
           self.states.rev_info,
           self.states.rev2idx(self.config['old']),
           self.states.rev2idx(self.config['new']),
+          old_value=self.config['old_value'],
+          new_value=self.config['new_value'],
           confidence=self.config['confidence'],
           observation=self.config['noisy'])
       try:
@@ -844,6 +857,14 @@
         'and new fail rate increased to 2/3. '
         'Skip if not flaky, say, "new=2/3" means old is always good.')
     parser_init.add_argument(
+        '--old_value',
+        type=float,
+        help='For performance test, value of old behavior')
+    parser_init.add_argument(
+        '--new_value',
+        type=float,
+        help='For performance test, value of new behavior')
+    parser_init.add_argument(
         '--confidence',
         type=float,
         default=DEFAULT_CONFIDENCE,
@@ -972,6 +993,8 @@
           self.states.rev_info,
           self.states.rev2idx(self.config['old']),
           self.states.rev2idx(self.config['new']),
+          old_value=self.config['old_value'],
+          new_value=self.config['new_value'],
           confidence=self.config['confidence'],
           observation=self.config['noisy'])
commit	81cde45e589f3ba341c06837fb1c0d775c0760ba	[log] [tgz]
author	Kuang-che Wu <kcwu@chromium.org>	Mon Apr 08 16:56:51 2019 +0800
committer	chrome-bot <chrome-bot@chromium.org>	Tue Apr 09 01:27:05 2019 -0700
tree	575570b57145822cc1db518cce1ac3103453ec11
parent	356ecb9c2920905a55fec2b078e79d2f50755d57 [diff] [blame]