bisect-kit: Add retry logics for swarming calls
BUG=b:205536530
TEST=manually
Change-Id: I97ea7804b5d6adebb2fe86ebc7f86904142a3ca7
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/bisect-kit/+/3447663
Reviewed-by: Kuang-che Wu <kcwu@chromium.org>
Commit-Queue: Zheng-Jie Chang <zjchang@chromium.org>
Tested-by: Zheng-Jie Chang <zjchang@chromium.org>
Auto-Submit: Zheng-Jie Chang <zjchang@chromium.org>
diff --git a/bisect_kit/cros_lab_util.py b/bisect_kit/cros_lab_util.py
index d1fd18a..ec3a8b0 100644
--- a/bisect_kit/cros_lab_util.py
+++ b/bisect_kit/cros_lab_util.py
@@ -158,7 +158,7 @@
cmd += ['--auth-service-account-json', service_account_json]
if limit:
cmd += ['--limit', str(limit)]
- data = util.check_output(*cmd, log_stdout=verbose_log)
+ data = util.check_output(*cmd, retry=5, log_stdout=verbose_log)
return json.loads(data)
@@ -175,7 +175,7 @@
]
if not common.under_luci_context() and service_account_json:
cmd += ['--auth-service-account-json', service_account_json]
- util.check_call(*cmd)
+ util.check_call(*cmd, retry=5)
def bb_cancel(build_id, reason):
diff --git a/bisect_kit/util.py b/bisect_kit/util.py
index 7f6dd99..887b8d5 100644
--- a/bisect_kit/util.py
+++ b/bisect_kit/util.py
@@ -189,7 +189,7 @@
return p.wait(timeout=timeout)
-def check_output(*args, timeout=None, **kwargs):
+def check_output(*args, timeout=None, retry=1, **kwargs):
"""Runs command and return output.
Modeled after subprocess.check_output.
@@ -200,24 +200,32 @@
Raises:
subprocess.CalledProcessError if the exit code is non-zero.
"""
- stdout_lines = []
def collect_stdout(line):
stdout_lines.append(line)
- p = Popen(args, stdout_callback=collect_stdout, **kwargs)
- p.wait(timeout=timeout)
- if kwargs.get('binary'):
- stdout = b''.join(stdout_lines)
- else:
- stdout = ''.join(stdout_lines)
- if p.returncode != 0:
- raise subprocess.CalledProcessError(p.returncode, args, stdout)
+ delay_duration = 1
+ while retry > 0:
+ retry -= 1
+ stdout_lines = []
+ p = Popen(args, stdout_callback=collect_stdout, **kwargs)
+ p.wait(timeout=timeout)
+ if kwargs.get('binary'):
+ stdout = b''.join(stdout_lines)
+ else:
+ stdout = ''.join(stdout_lines)
+
+ if p.returncode == 0:
+ break
+ if retry <= 0:
+ raise subprocess.CalledProcessError(p.returncode, args, stdout)
+ time.sleep(delay_duration)
+ delay_duration = min(delay_duration * 2, 100)
return stdout
-def check_call(*args, timeout=None, **kwargs):
+def check_call(*args, timeout=None, retry=1, **kwargs):
"""Runs command and ensures it succeeded.
Modeled after subprocess.check_call.
@@ -225,10 +233,18 @@
Raises:
subprocess.CalledProcessError if the exit code is non-zero.
"""
- p = Popen(args, **kwargs)
- p.wait(timeout=timeout)
- if p.returncode != 0:
- raise subprocess.CalledProcessError(p.returncode, args)
+ delay_duration = 1
+ while retry > 0:
+ retry -= 1
+ p = Popen(args, **kwargs)
+ p.wait(timeout=timeout)
+
+ if p.returncode == 0:
+ break
+ if retry <= 0:
+ raise subprocess.CalledProcessError(p.returncode, args)
+ time.sleep(delay_duration)
+ delay_duration = min(delay_duration * 2, 100)
def ssh_cmd(host, *args, connect_timeout=None, allow_retry=False):