bisect-kit: Add retry logics for swarming calls

BUG=b:205536530
TEST=manually

Change-Id: I97ea7804b5d6adebb2fe86ebc7f86904142a3ca7
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/bisect-kit/+/3447663
Reviewed-by: Kuang-che Wu <kcwu@chromium.org>
Commit-Queue: Zheng-Jie Chang <zjchang@chromium.org>
Tested-by: Zheng-Jie Chang <zjchang@chromium.org>
Auto-Submit: Zheng-Jie Chang <zjchang@chromium.org>
diff --git a/bisect_kit/cros_lab_util.py b/bisect_kit/cros_lab_util.py
index d1fd18a..ec3a8b0 100644
--- a/bisect_kit/cros_lab_util.py
+++ b/bisect_kit/cros_lab_util.py
@@ -158,7 +158,7 @@
     cmd += ['--auth-service-account-json', service_account_json]
   if limit:
     cmd += ['--limit', str(limit)]
-  data = util.check_output(*cmd, log_stdout=verbose_log)
+  data = util.check_output(*cmd, retry=5, log_stdout=verbose_log)
   return json.loads(data)
 
 
@@ -175,7 +175,7 @@
   ]
   if not common.under_luci_context() and service_account_json:
     cmd += ['--auth-service-account-json', service_account_json]
-  util.check_call(*cmd)
+  util.check_call(*cmd, retry=5)
 
 
 def bb_cancel(build_id, reason):
diff --git a/bisect_kit/util.py b/bisect_kit/util.py
index 7f6dd99..887b8d5 100644
--- a/bisect_kit/util.py
+++ b/bisect_kit/util.py
@@ -189,7 +189,7 @@
   return p.wait(timeout=timeout)
 
 
-def check_output(*args, timeout=None, **kwargs):
+def check_output(*args, timeout=None, retry=1, **kwargs):
   """Runs command and return output.
 
   Modeled after subprocess.check_output.
@@ -200,24 +200,32 @@
   Raises:
     subprocess.CalledProcessError if the exit code is non-zero.
   """
-  stdout_lines = []
 
   def collect_stdout(line):
     stdout_lines.append(line)
 
-  p = Popen(args, stdout_callback=collect_stdout, **kwargs)
-  p.wait(timeout=timeout)
-  if kwargs.get('binary'):
-    stdout = b''.join(stdout_lines)
-  else:
-    stdout = ''.join(stdout_lines)
-  if p.returncode != 0:
-    raise subprocess.CalledProcessError(p.returncode, args, stdout)
+  delay_duration = 1
+  while retry > 0:
+    retry -= 1
+    stdout_lines = []
+    p = Popen(args, stdout_callback=collect_stdout, **kwargs)
+    p.wait(timeout=timeout)
+    if kwargs.get('binary'):
+      stdout = b''.join(stdout_lines)
+    else:
+      stdout = ''.join(stdout_lines)
+
+    if p.returncode == 0:
+      break
+    if retry <= 0:
+      raise subprocess.CalledProcessError(p.returncode, args, stdout)
+    time.sleep(delay_duration)
+    delay_duration = min(delay_duration * 2, 100)
 
   return stdout
 
 
-def check_call(*args, timeout=None, **kwargs):
+def check_call(*args, timeout=None, retry=1, **kwargs):
   """Runs command and ensures it succeeded.
 
   Modeled after subprocess.check_call.
@@ -225,10 +233,18 @@
   Raises:
     subprocess.CalledProcessError if the exit code is non-zero.
   """
-  p = Popen(args, **kwargs)
-  p.wait(timeout=timeout)
-  if p.returncode != 0:
-    raise subprocess.CalledProcessError(p.returncode, args)
+  delay_duration = 1
+  while retry > 0:
+    retry -= 1
+    p = Popen(args, **kwargs)
+    p.wait(timeout=timeout)
+
+    if p.returncode == 0:
+      break
+    if retry <= 0:
+      raise subprocess.CalledProcessError(p.returncode, args)
+    time.sleep(delay_duration)
+    delay_duration = min(delay_duration * 2, 100)
 
 
 def ssh_cmd(host, *args, connect_timeout=None, allow_retry=False):