gclient_utils: buffer output as bytestrings in Annotated
In Python 3 byestrings and normal strings can't be concatenated.
To fix this we buffer as bytestrings in the Annotated wrapper.
We can't decode to a string because the output might come byte-by-byte, so it doesn't work with Unicode characters like ✔.
Also had to update gclient_test.py, where double-wrapping stdout with Annotated caused made output not work and include_zero=True working caused other unintended side-effects.
Example error from "fetch chromium":
Traceback (most recent call last):
File "C:\Google\depot_tools\gclient_scm.py", line 1045, in _Clone
self._Run(clone_cmd, options, cwd=self._root_dir, retry=True,
File "C:\Google\depot_tools\gclient_scm.py", line 1370, in _Run
gclient_utils.CheckCallAndFilter(cmd, env=env, **kwargs)
File "C:\Google\depot_tools\gclient_utils.py", line 583, in CheckCallAndFilter
show_header_if_necessary(needs_header, attempt)
File "C:\Google\depot_tools\gclient_utils.py", line 533, in show_header_if_necessary
stdout_write(header.encode())
File "C:\Google\depot_tools\gclient_utils.py", line 391, in write
obj[0] += out
TypeError: can only concatenate str (not "bytes") to str
Bug: 984182
Change-Id: If7037d30e9faf524f2405258281f6e6cd0bcdcae
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/1778745
Commit-Queue: Edward Lesmes <ehmaldonado@chromium.org>
Reviewed-by: Edward Lesmes <ehmaldonado@chromium.org>
Auto-Submit: Raul Tambre <raul@tambre.ee>
diff --git a/gclient_utils.py b/gclient_utils.py
index 3a6bc1f..2c38a3a 100644
--- a/gclient_utils.py
+++ b/gclient_utils.py
@@ -40,8 +40,10 @@
if sys.version_info.major == 2:
from cStringIO import StringIO
+ string_type = basestring
else:
from io import StringIO
+ string_type = str
RETRY_MAX = 3
@@ -371,6 +373,10 @@
def write(self, out):
index = getattr(threading.currentThread(), 'index', 0)
if not index and not self.__include_zero:
+ # Store as bytes to ensure Unicode characters get output correctly.
+ if isinstance(out, bytes):
+ out = out.decode('utf-8')
+
# Unindexed threads aren't buffered.
return self._wrapped.write(out)
@@ -380,28 +386,32 @@
# Strings are immutable, requiring to keep a lock for the whole dictionary
# otherwise. Using an array is faster than using a dummy object.
if not index in self.__output_buffers:
- obj = self.__output_buffers[index] = ['']
+ obj = self.__output_buffers[index] = [b'']
else:
obj = self.__output_buffers[index]
finally:
self.lock.release()
+ # Store as bytes to ensure Unicode characters get output correctly.
+ if isinstance(out, string_type):
+ out = out.encode('utf-8')
+
# Continue lockless.
obj[0] += out
while True:
# TODO(agable): find both of these with a single pass.
- cr_loc = obj[0].find('\r')
- lf_loc = obj[0].find('\n')
+ cr_loc = obj[0].find(b'\r')
+ lf_loc = obj[0].find(b'\n')
if cr_loc == lf_loc == -1:
break
elif cr_loc == -1 or (lf_loc >= 0 and lf_loc < cr_loc):
- line, remaining = obj[0].split('\n', 1)
+ line, remaining = obj[0].split(b'\n', 1)
if line:
- self._wrapped.write('%d>%s\n' % (index, line))
+ self._wrapped.write('%d>%s\n' % (index, line.decode('utf-8')))
elif lf_loc == -1 or (cr_loc >= 0 and cr_loc < lf_loc):
- line, remaining = obj[0].split('\r', 1)
+ line, remaining = obj[0].split(b'\r', 1)
if line:
- self._wrapped.write('%d>%s\r' % (index, line))
+ self._wrapped.write('%d>%s\r' % (index, line.decode('utf-8')))
obj[0] = remaining
def flush(self):
@@ -423,7 +433,7 @@
# Don't keep the lock while writting. Will append \n when it shouldn't.
for orphan in orphans:
if orphan[1]:
- self._wrapped.write('%d>%s\n' % (orphan[0], orphan[1]))
+ self._wrapped.write('%d>%s\n' % (orphan[0], orphan[1].decode('utf-8')))
return self._wrapped.flush()