Fix unicode upgrade of patch.
When a diff contains utf8 character, the filename would be stored as a unicode
object instead of a str. This is even if the buffer is originally sent as a
utf-8 encoded str. Then once the diff is reconstructed to be sent to 'patch', a
unicode object instead of a str would be sent, confusing patch.
R=cmp@chromium.org
BUG=
TEST=
Review URL: http://codereview.chromium.org/9387024
git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@122361 0039d316-1c4b-4281-b951-d872f2087c98
diff --git a/patch.py b/patch.py
index 69ba323..90886e9 100644
--- a/patch.py
+++ b/patch.py
@@ -37,6 +37,15 @@
# Set when the file is copied or moved.
self.source_filename = None
+ @property
+ def filename_utf8(self):
+ return self.filename.encode('utf-8')
+
+ @property
+ def source_filename_utf8(self):
+ if self.source_filename is not None:
+ return self.source_filename.encode('utf-8')
+
@staticmethod
def _process_filename(filename):
filename = filename.replace('\\', '/')
@@ -88,8 +97,8 @@
out += ' '
out += ' '
if self.source_filename:
- out += '%s->' % self.source_filename
- return out + str(self.filename)
+ out += '%s->' % self.source_filename_utf8
+ return out + self.filename_utf8
class FilePatchDelete(FilePatchBase):
@@ -153,25 +162,27 @@
# patch is stupid. It patches the source_filename instead so get rid of
# any source_filename reference if needed.
return (
- self.diff_header.replace(self.source_filename, self.filename) +
+ self.diff_header.replace(
+ self.source_filename_utf8, self.filename_utf8) +
self.diff_hunks)
def set_relpath(self, relpath):
- old_filename = self.filename
- old_source_filename = self.source_filename or self.filename
+ old_filename = self.filename_utf8
+ old_source_filename = self.source_filename_utf8 or self.filename_utf8
super(FilePatchDiff, self).set_relpath(relpath)
# Update the header too.
- source_filename = self.source_filename or self.filename
+ filename = self.filename_utf8
+ source_filename = self.source_filename_utf8 or self.filename_utf8
lines = self.diff_header.splitlines(True)
for i, line in enumerate(lines):
if line.startswith('diff --git'):
lines[i] = line.replace(
'a/' + old_source_filename, source_filename).replace(
- 'b/' + old_filename, self.filename)
+ 'b/' + old_filename, filename)
elif re.match(r'^\w+ from .+$', line) or line.startswith('---'):
lines[i] = line.replace(old_source_filename, source_filename)
elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'):
- lines[i] = line.replace(old_filename, self.filename)
+ lines[i] = line.replace(old_filename, filename)
self.diff_header = ''.join(lines)
def _split_header(self, diff):
@@ -197,7 +208,7 @@
# Mangle any \\ in the header to /.
header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
- basename = os.path.basename(self.filename)
+ basename = os.path.basename(self.filename_utf8)
for i in xrange(len(header)):
if (header[i].split(' ', 1)[0] in header_lines or
header[i].endswith(basename)):
@@ -314,7 +325,7 @@
new = self.mangle(match.group(2))
# The rename is about the new file so the old file can be anything.
- if new not in (self.filename, 'dev/null'):
+ if new not in (self.filename_utf8, 'dev/null'):
self._fail('Unexpected git diff output name %s.' % new)
if old == 'dev/null' and new == 'dev/null':
self._fail('Unexpected /dev/null git diff.')
@@ -323,9 +334,9 @@
if not old or not new:
self._fail('Unexpected git diff; couldn\'t find git header.')
- if old not in (self.filename, 'dev/null'):
+ if old not in (self.filename_utf8, 'dev/null'):
# Copy or rename.
- self.source_filename = old
+ self.source_filename = old.decode('utf-8')
self.is_new = True
last_line = ''
@@ -337,7 +348,7 @@
# Cheap check to make sure the file name is at least mentioned in the
# 'diff' header. That the only remaining invariant.
- if not self.filename in self.diff_header:
+ if not self.filename_utf8 in self.diff_header:
self._fail('Diff seems corrupted.')
def _verify_git_header_process_line(self, lines, line, last_line):
@@ -349,7 +360,7 @@
http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
"""
match = re.match(r'^(rename|copy) from (.+)$', line)
- old = self.source_filename or self.filename
+ old = self.source_filename_utf8 or self.filename_utf8
if match:
if old != match.group(2):
self._fail('Unexpected git diff input name for line %s.' % line)
@@ -361,7 +372,7 @@
match = re.match(r'^(rename|copy) to (.+)$', line)
if match:
- if self.filename != match.group(2):
+ if self.filename_utf8 != match.group(2):
self._fail('Unexpected git diff output name for line %s.' % line)
if not last_line.startswith('%s from ' % match.group(1)):
self._fail(
@@ -404,7 +415,7 @@
self._fail('Unexpected git diff: --- not following +++.')
if '/dev/null' == match.group(1):
self.is_delete = True
- elif self.filename != self.mangle(match.group(1)):
+ elif self.filename_utf8 != self.mangle(match.group(1)):
self._fail(
'Unexpected git diff: %s != %s.' % (self.filename, match.group(1)))
if lines:
@@ -429,7 +440,7 @@
# Cheap check to make sure the file name is at least mentioned in the
# 'diff' header. That the only remaining invariant.
- if not self.filename in self.diff_header:
+ if not self.filename_utf8 in self.diff_header:
self._fail('Diff seems corrupted.')
def _verify_svn_header_process_line(self, lines, line, last_line):
@@ -443,9 +454,9 @@
self._fail('--- and +++ are reversed')
if match.group(1) == '/dev/null':
self.is_new = True
- elif self.mangle(match.group(1)) != self.filename:
+ elif self.mangle(match.group(1)) != self.filename_utf8:
# guess the source filename.
- self.source_filename = match.group(1)
+ self.source_filename = match.group(1).decode('utf-8')
self.is_new = True
if not lines or not lines[0].startswith('+++'):
self._fail('Nothing after header.')
@@ -457,7 +468,7 @@
self._fail('Unexpected diff: --- not following +++.')
if match.group(1) == '/dev/null':
self.is_delete = True
- elif self.mangle(match.group(1)) != self.filename:
+ elif self.mangle(match.group(1)) != self.filename_utf8:
self._fail('Unexpected diff: %s.' % match.group(1))
if lines:
self._fail('Crap after +++')
@@ -479,10 +490,10 @@
Deletes are last.
"""
if p.source_filename:
- return (p.is_delete, p.source_filename, p.filename)
+ return (p.is_delete, p.source_filename_utf8, p.filename_utf8)
else:
# tuple are always greater than string, abuse that fact.
- return (p.is_delete, (p.filename,), p.filename)
+ return (p.is_delete, (p.filename_utf8,), p.filename_utf8)
self.patches = sorted(patches, key=key)