blob: f23bda7bf4389f50c3e66c1af262bcf058963346 [file] [log] [blame]
maruel@chromium.org35625c72011-03-23 17:34:02 +00001# Copyright (c) 2011 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
maruel@chromium.org35625c72011-03-23 17:34:02 +00004"""Collection of functions and classes to fix various encoding problems on
5multiple platforms with python.
6"""
7
8import codecs
9import locale
10import os
11import sys
12
13
maruel@chromium.org35625c72011-03-23 17:34:02 +000014def complain(message):
Mike Frysinger124bb8e2023-09-06 05:48:55 +000015 """If any exception occurs in this file, we'll probably try to print it
maruel@chromium.org35625c72011-03-23 17:34:02 +000016 on stderr, which makes for frustrating debugging if stderr is directed
17 to our wrapper. So be paranoid about catching errors and reporting them
18 to sys.__stderr__, so that the user has a higher chance to see them.
19 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +000020 print(isinstance(message, str) and message or repr(message),
21 file=sys.__stderr__)
maruel@chromium.org35625c72011-03-23 17:34:02 +000022
23
24def fix_default_encoding():
Mike Frysinger124bb8e2023-09-06 05:48:55 +000025 """Forces utf8 solidly on all platforms.
maruel@chromium.org35625c72011-03-23 17:34:02 +000026
27 By default python execution environment is lazy and defaults to ascii
28 encoding.
29
30 http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/
31 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +000032 if sys.getdefaultencoding() == 'utf-8':
33 return False
maruel@chromium.org35625c72011-03-23 17:34:02 +000034
Mike Frysinger124bb8e2023-09-06 05:48:55 +000035 # Regenerate setdefaultencoding.
36 reload(sys)
37 # Module 'sys' has no 'setdefaultencoding' member
38 # pylint: disable=no-member
39 sys.setdefaultencoding('utf-8')
40 for attr in dir(locale):
41 if attr[0:3] != 'LC_':
42 continue
43 aref = getattr(locale, attr)
44 try:
45 locale.setlocale(aref, '')
46 except locale.Error:
47 continue
48 try:
49 lang, _ = locale.getdefaultlocale()
50 except (TypeError, ValueError):
51 continue
52 if lang:
53 try:
54 locale.setlocale(aref, (lang, 'UTF-8'))
55 except locale.Error:
56 os.environ[attr] = lang + '.UTF-8'
maruel@chromium.orgcfa826c2011-03-25 00:47:57 +000057 try:
Mike Frysinger124bb8e2023-09-06 05:48:55 +000058 locale.setlocale(locale.LC_ALL, '')
maruel@chromium.orgcfa826c2011-03-25 00:47:57 +000059 except locale.Error:
Mike Frysinger124bb8e2023-09-06 05:48:55 +000060 pass
61 return True
maruel@chromium.org35625c72011-03-23 17:34:02 +000062
63
64###############################
65# Windows specific
66
Mike Frysinger124bb8e2023-09-06 05:48:55 +000067
maruel@chromium.org35625c72011-03-23 17:34:02 +000068def fix_win_codec():
Mike Frysinger124bb8e2023-09-06 05:48:55 +000069 """Works around <http://bugs.python.org/issue6058>."""
70 # <http://msdn.microsoft.com/en-us/library/dd317756.aspx>
71 try:
72 codecs.lookup('cp65001')
73 return False
74 except LookupError:
75 codecs.register(
76 lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None)
77 return True
maruel@chromium.org35625c72011-03-23 17:34:02 +000078
79
80class WinUnicodeOutputBase(object):
Mike Frysinger124bb8e2023-09-06 05:48:55 +000081 """Base class to adapt sys.stdout or sys.stderr to behave correctly on
maruel@chromium.org35625c72011-03-23 17:34:02 +000082 Windows.
83
84 Setting encoding to utf-8 is recommended.
85 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +000086 def __init__(self, fileno, name, encoding):
87 # Corresponding file handle.
88 self._fileno = fileno
89 self.encoding = encoding
90 self.name = name
maruel@chromium.org35625c72011-03-23 17:34:02 +000091
Mike Frysinger124bb8e2023-09-06 05:48:55 +000092 self.closed = False
93 self.softspace = False
94 self.mode = 'w'
maruel@chromium.org35625c72011-03-23 17:34:02 +000095
Mike Frysinger124bb8e2023-09-06 05:48:55 +000096 @staticmethod
97 def isatty():
98 return False
maruel@chromium.org35625c72011-03-23 17:34:02 +000099
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000100 def close(self):
101 # Don't really close the handle, that would only cause problems.
102 self.closed = True
maruel@chromium.org35625c72011-03-23 17:34:02 +0000103
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000104 def fileno(self):
105 return self._fileno
maruel@chromium.org35625c72011-03-23 17:34:02 +0000106
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000107 def flush(self):
108 raise NotImplementedError()
maruel@chromium.org35625c72011-03-23 17:34:02 +0000109
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000110 def write(self, text):
111 raise NotImplementedError()
maruel@chromium.org35625c72011-03-23 17:34:02 +0000112
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000113 def writelines(self, lines):
114 try:
115 for line in lines:
116 self.write(line)
117 except Exception as e:
118 complain('%s.writelines: %r' % (self.name, e))
119 raise
maruel@chromium.org35625c72011-03-23 17:34:02 +0000120
121
122class WinUnicodeConsoleOutput(WinUnicodeOutputBase):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000123 """Output adapter to a Windows Console.
maruel@chromium.org35625c72011-03-23 17:34:02 +0000124
125 Understands how to use the win32 console API.
126 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000127 def __init__(self, console_handle, fileno, stream_name, encoding):
128 super(WinUnicodeConsoleOutput,
129 self).__init__(fileno, '<Unicode console %s>' % stream_name,
130 encoding)
131 # Handle to use for WriteConsoleW
132 self._console_handle = console_handle
maruel@chromium.org35625c72011-03-23 17:34:02 +0000133
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000134 # Loads the necessary function.
135 # These types are available on linux but not Mac.
136 # pylint: disable=no-name-in-module,F0401
137 from ctypes import byref, GetLastError, POINTER, windll, WINFUNCTYPE
138 from ctypes.wintypes import BOOL, DWORD, HANDLE, LPWSTR
139 from ctypes.wintypes import LPVOID # pylint: disable=no-name-in-module
maruel@chromium.org35625c72011-03-23 17:34:02 +0000140
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000141 self._DWORD = DWORD
142 self._byref = byref
maruel@chromium.org35625c72011-03-23 17:34:02 +0000143
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000144 # <http://msdn.microsoft.com/en-us/library/ms687401.aspx>
145 self._WriteConsoleW = WINFUNCTYPE(BOOL, HANDLE, LPWSTR, DWORD,
146 POINTER(DWORD),
147 LPVOID)(('WriteConsoleW',
148 windll.kernel32))
149 self._GetLastError = GetLastError
maruel@chromium.org35625c72011-03-23 17:34:02 +0000150
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000151 def flush(self):
152 # No need to flush the console since it's immediate.
153 pass
maruel@chromium.org35625c72011-03-23 17:34:02 +0000154
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000155 def write(self, text):
156 try:
157 if isinstance(text, bytes):
158 # Bytestrings need to be decoded to a string before being passed
159 # to Windows.
160 text = text.decode(self.encoding, 'replace')
161 remaining = len(text)
162 while remaining > 0:
163 n = self._DWORD(0)
164 # There is a shorter-than-documented limitation on the length of
165 # the string passed to WriteConsoleW. See
166 # <http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232>.
167 retval = self._WriteConsoleW(self._console_handle, text,
168 min(remaining, 10000),
169 self._byref(n), None)
170 if retval == 0 or n.value == 0:
171 raise IOError('WriteConsoleW returned %r, n.value = %r, '
172 'last error = %r' %
173 (retval, n.value, self._GetLastError()))
174 remaining -= n.value
175 if not remaining:
176 break
177 text = text[int(n.value):]
178 except Exception as e:
179 complain('%s.write: %r' % (self.name, e))
180 raise
maruel@chromium.org35625c72011-03-23 17:34:02 +0000181
182
183class WinUnicodeOutput(WinUnicodeOutputBase):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000184 """Output adaptor to a file output on Windows.
maruel@chromium.org35625c72011-03-23 17:34:02 +0000185
186 If the standard FileWrite function is used, it will be encoded in the current
Quinten Yearsley925cedb2020-04-13 17:49:39 +0000187 code page. WriteConsoleW() permits writing any character.
maruel@chromium.org35625c72011-03-23 17:34:02 +0000188 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000189 def __init__(self, stream, fileno, encoding):
190 super(WinUnicodeOutput,
191 self).__init__(fileno, '<Unicode redirected %s>' % stream.name,
192 encoding)
193 # Output stream
194 self._stream = stream
maruel@chromium.org35625c72011-03-23 17:34:02 +0000195
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000196 # Flush right now.
197 self.flush()
maruel@chromium.org35625c72011-03-23 17:34:02 +0000198
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000199 def flush(self):
200 try:
201 self._stream.flush()
202 except Exception as e:
203 complain('%s.flush: %r from %r' % (self.name, e, self._stream))
204 raise
maruel@chromium.org35625c72011-03-23 17:34:02 +0000205
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000206 def write(self, text):
207 try:
208 if isinstance(text, bytes):
209 # Replace characters that cannot be printed instead of failing.
210 text = text.decode(self.encoding, 'replace')
211 # When redirecting to a file or process any \n characters will be
212 # replaced with \r\n. If the text to be printed already has \r\n
213 # line endings then \r\r\n line endings will be generated, leading
214 # to double-spacing of some output. Normalizing line endings to \n
215 # avoids this problem.
216 text = text.replace('\r\n', '\n')
217 self._stream.write(text)
218 except Exception as e:
219 complain('%s.write: %r' % (self.name, e))
220 raise
maruel@chromium.org35625c72011-03-23 17:34:02 +0000221
222
223def win_handle_is_a_console(handle):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000224 """Returns True if a Windows file handle is a handle to a console."""
225 # These types are available on linux but not Mac.
226 # pylint: disable=no-name-in-module,F0401
227 from ctypes import byref, POINTER, windll, WINFUNCTYPE
228 from ctypes.wintypes import BOOL, DWORD, HANDLE
maruel@chromium.org35625c72011-03-23 17:34:02 +0000229
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000230 FILE_TYPE_CHAR = 0x0002
231 FILE_TYPE_REMOTE = 0x8000
232 INVALID_HANDLE_VALUE = DWORD(-1).value
maruel@chromium.org35625c72011-03-23 17:34:02 +0000233
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000234 # <http://msdn.microsoft.com/en-us/library/ms683167.aspx>
235 GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))(
236 ('GetConsoleMode', windll.kernel32))
237 # <http://msdn.microsoft.com/en-us/library/aa364960.aspx>
238 GetFileType = WINFUNCTYPE(DWORD, DWORD)(('GetFileType', windll.kernel32))
maruel@chromium.org35625c72011-03-23 17:34:02 +0000239
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000240 # GetStdHandle returns INVALID_HANDLE_VALUE, NULL, or a valid handle.
241 if handle == INVALID_HANDLE_VALUE or handle is None:
242 return False
243 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) == FILE_TYPE_CHAR
244 and GetConsoleMode(handle, byref(DWORD())))
maruel@chromium.org35625c72011-03-23 17:34:02 +0000245
246
247def win_get_unicode_stream(stream, excepted_fileno, output_handle, encoding):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000248 """Returns a unicode-compatible stream.
maruel@chromium.org35625c72011-03-23 17:34:02 +0000249
250 This function will return a direct-Console writing object only if:
251 - the file number is the expected console file number
252 - the handle the expected file handle
253 - the 'real' handle is in fact a handle to a console.
254 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000255 old_fileno = getattr(stream, 'fileno', lambda: None)()
256 if old_fileno == excepted_fileno:
257 # These types are available on linux but not Mac.
258 # pylint: disable=no-name-in-module,F0401
259 from ctypes import windll, WINFUNCTYPE
260 from ctypes.wintypes import DWORD, HANDLE
maruel@chromium.org35625c72011-03-23 17:34:02 +0000261
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000262 # <http://msdn.microsoft.com/en-us/library/ms683231.aspx>
263 GetStdHandle = WINFUNCTYPE(HANDLE,
264 DWORD)(('GetStdHandle', windll.kernel32))
maruel@chromium.org35625c72011-03-23 17:34:02 +0000265
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000266 real_output_handle = GetStdHandle(DWORD(output_handle))
267 if win_handle_is_a_console(real_output_handle):
268 # It's a console.
269 return WinUnicodeConsoleOutput(real_output_handle, old_fileno,
270 stream.name, encoding)
maruel@chromium.org35625c72011-03-23 17:34:02 +0000271
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000272 # It's something else. Create an auto-encoding stream.
273 return WinUnicodeOutput(stream, old_fileno, encoding)
maruel@chromium.org35625c72011-03-23 17:34:02 +0000274
275
276def fix_win_console(encoding):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000277 """Makes Unicode console output work independently of the current code page.
maruel@chromium.org35625c72011-03-23 17:34:02 +0000278
279 This also fixes <http://bugs.python.org/issue1602>.
280 Credit to Michael Kaplan
281 <http://blogs.msdn.com/b/michkap/archive/2010/04/07/9989346.aspx> and
282 TZOmegaTZIOY
283 <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462>.
284 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000285 if (isinstance(sys.stdout, WinUnicodeOutputBase)
286 or isinstance(sys.stderr, WinUnicodeOutputBase)):
287 return False
maruel@chromium.org35625c72011-03-23 17:34:02 +0000288
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000289 try:
290 # SetConsoleCP and SetConsoleOutputCP could be used to change the code
291 # page but it's not really useful since the code here is using
292 # WriteConsoleW(). Also, changing the code page is 'permanent' to the
293 # console and needs to be reverted manually. In practice one needs to
294 # set the console font to a TTF font to be able to see all the
295 # characters but it failed for me in practice. In any case, it won't
296 # throw any exception when printing, which is the important part. -11
297 # and -12 are defined in stdio.h
298 sys.stdout = win_get_unicode_stream(sys.stdout, 1, -11, encoding)
299 sys.stderr = win_get_unicode_stream(sys.stderr, 2, -12, encoding)
300 # TODO(maruel): Do sys.stdin with ReadConsoleW(). Albeit the limitation
301 # is "It doesn't appear to be possible to read Unicode characters in
302 # UTF-8 mode" and this appears to be a limitation of cmd.exe.
303 except Exception as e:
304 complain('exception %r while fixing up sys.stdout and sys.stderr' % e)
305 return True
maruel@chromium.org35625c72011-03-23 17:34:02 +0000306
307
308def fix_encoding():
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000309 """Fixes various encoding problems on all platforms.
maruel@chromium.org35625c72011-03-23 17:34:02 +0000310
qyearsley12fa6ff2016-08-24 09:18:40 -0700311 Should be called at the very beginning of the process.
maruel@chromium.org35625c72011-03-23 17:34:02 +0000312 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000313 ret = True
314 if sys.platform == 'win32':
315 ret &= fix_win_codec()
maruel@chromium.org35625c72011-03-23 17:34:02 +0000316
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000317 ret &= fix_default_encoding()
maruel@chromium.org35625c72011-03-23 17:34:02 +0000318
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000319 if sys.platform == 'win32':
320 encoding = sys.getdefaultencoding()
321 ret &= fix_win_console(encoding)
322 return ret