blob: 316333f29bd158081791f8afa97a867700880b60 [file] [log] [blame]
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00001# coding=utf8
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Utility functions to handle patches."""
6
maruel@chromium.orgcd619402011-04-09 00:08:00 +00007import posixpath
8import os
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00009import re
10
11
12class UnsupportedPatchFormat(Exception):
13 def __init__(self, filename, status):
14 super(UnsupportedPatchFormat, self).__init__(filename, status)
15 self.filename = filename
16 self.status = status
17
18 def __str__(self):
19 out = 'Can\'t process patch for file %s.' % self.filename
20 if self.status:
21 out += '\n%s' % self.status
22 return out
23
24
25class FilePatchBase(object):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000026 """Defines a single file being modified.
27
28 '/' is always used instead of os.sep for consistency.
29 """
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000030 is_delete = False
31 is_binary = False
32
maruel@chromium.orgcd619402011-04-09 00:08:00 +000033 def __init__(self, filename):
34 self.filename = None
35 self._set_filename(filename)
36
37 def _set_filename(self, filename):
38 self.filename = filename.replace('\\', '/')
39 # Blacklist a few characters for simplicity.
40 for i in ('%', '$', '..', '\'', '"'):
41 if i in self.filename:
42 self._fail('Can\'t use \'%s\' in filename.' % i)
43 for i in ('/', 'CON', 'COM'):
44 if self.filename.startswith(i):
45 self._fail('Filename can\'t start with \'%s\'.' % i)
46
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000047 def get(self):
48 raise NotImplementedError('Nothing to grab')
49
maruel@chromium.orgcd619402011-04-09 00:08:00 +000050 def set_relpath(self, relpath):
51 if not relpath:
52 return
53 relpath = relpath.replace('\\', '/')
54 if relpath[0] == '/':
55 self._fail('Relative path starts with %s' % relpath[0])
56 self._set_filename(posixpath.join(relpath, self.filename))
57
58 def _fail(self, msg):
59 raise UnsupportedPatchFormat(self.filename, msg)
60
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000061
62class FilePatchDelete(FilePatchBase):
63 """Deletes a file."""
64 is_delete = True
65
66 def __init__(self, filename, is_binary):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000067 super(FilePatchDelete, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000068 self.is_binary = is_binary
69
70 def get(self):
71 raise NotImplementedError('Nothing to grab')
72
73
74class FilePatchBinary(FilePatchBase):
75 """Content of a new binary file."""
76 is_binary = True
77
78 def __init__(self, filename, data, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000079 super(FilePatchBinary, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000080 self.data = data
81 self.svn_properties = svn_properties or []
82
83 def get(self):
84 return self.data
85
86
87class FilePatchDiff(FilePatchBase):
88 """Patch for a single file."""
89
90 def __init__(self, filename, diff, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000091 super(FilePatchDiff, self).__init__(filename)
maruel@chromium.org61e0b692011-04-12 21:01:01 +000092 if not diff:
93 self._fail('File doesn\'t have a diff.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +000094 self.diff_header, self.diff_hunks = self._split_header(diff)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000095 self.svn_properties = svn_properties or []
maruel@chromium.orgcd619402011-04-09 00:08:00 +000096 self.is_git_diff = self._is_git_diff_header(self.diff_header)
97 self.patchlevel = 0
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000098 if self.is_git_diff:
maruel@chromium.orgcd619402011-04-09 00:08:00 +000099 self._verify_git_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000100 assert not svn_properties
101 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000102 self._verify_svn_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000103
104 def get(self):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000105 return self.diff_header + self.diff_hunks
106
107 def set_relpath(self, relpath):
108 old_filename = self.filename
109 super(FilePatchDiff, self).set_relpath(relpath)
110 # Update the header too.
111 self.diff_header = self.diff_header.replace(old_filename, self.filename)
112
113 def _split_header(self, diff):
114 """Splits a diff in two: the header and the hunks."""
115 header = []
116 hunks = diff.splitlines(True)
117 while hunks:
118 header.append(hunks.pop(0))
119 if header[-1].startswith('--- '):
120 break
121 else:
122 # Some diff may not have a ---/+++ set like a git rename with no change or
123 # a svn diff with only property change.
124 pass
125
126 if hunks:
127 if not hunks[0].startswith('+++ '):
128 self._fail('Inconsistent header')
129 header.append(hunks.pop(0))
130 if hunks:
131 if not hunks[0].startswith('@@ '):
132 self._fail('Inconsistent hunk header')
133
134 # Mangle any \\ in the header to /.
135 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
136 basename = os.path.basename(self.filename)
137 for i in xrange(len(header)):
138 if (header[i].split(' ', 1)[0] in header_lines or
139 header[i].endswith(basename)):
140 header[i] = header[i].replace('\\', '/')
141 return ''.join(header), ''.join(hunks)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000142
143 @staticmethod
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000144 def _is_git_diff_header(diff_header):
145 """Returns True if the diff for a single files was generated with git."""
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000146 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
147 # Rename partial change:
148 # http://codereview.chromium.org/download/issue6250123_3013_6010.diff
149 # Rename no change:
150 # http://codereview.chromium.org/download/issue6287022_3001_4010.diff
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000151 return any(l.startswith('diff --git') for l in diff_header.splitlines())
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000152
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000153 def mangle(self, string):
154 """Mangle a file path."""
155 return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:])
156
157 def _verify_git_header(self):
158 """Sanity checks the header.
159
160 Expects the following format:
161
162 <garbagge>
163 diff --git (|a/)<filename> (|b/)<filename>
164 <similarity>
165 <filemode changes>
166 <index>
167 <copy|rename from>
168 <copy|rename to>
169 --- <filename>
170 +++ <filename>
171
172 Everything is optional except the diff --git line.
173 """
174 lines = self.diff_header.splitlines()
175
176 # Verify the diff --git line.
177 old = None
178 new = None
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000179 while lines:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000180 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
181 if not match:
182 continue
183 old = match.group(1).replace('\\', '/')
184 new = match.group(2).replace('\\', '/')
185 if old.startswith('a/') and new.startswith('b/'):
186 self.patchlevel = 1
187 old = old[2:]
188 new = new[2:]
189 # The rename is about the new file so the old file can be anything.
190 if new not in (self.filename, 'dev/null'):
191 self._fail('Unexpected git diff output name %s.' % new)
192 if old == 'dev/null' and new == 'dev/null':
193 self._fail('Unexpected /dev/null git diff.')
194 break
195
196 if not old or not new:
197 self._fail('Unexpected git diff; couldn\'t find git header.')
198
199 # Handle these:
200 # rename from <>
201 # rename to <>
202 # copy from <>
203 # copy to <>
204 while lines:
205 if lines[0].startswith('--- '):
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000206 break
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000207 match = re.match(r'^(rename|copy) from (.+)$', lines.pop(0))
208 if not match:
209 continue
210 if old != match.group(2):
211 self._fail('Unexpected git diff input name for %s.' % match.group(1))
212 if not lines:
213 self._fail('Missing git diff output name for %s.' % match.group(1))
214 match = re.match(r'^(rename|copy) to (.+)$', lines.pop(0))
215 if not match:
216 self._fail('Missing git diff output name for %s.' % match.group(1))
217 if new != match.group(2):
218 self._fail('Unexpected git diff output name for %s.' % match.group(1))
219
220 # Handle ---/+++
221 while lines:
222 match = re.match(r'^--- (.*)$', lines.pop(0))
223 if not match:
224 continue
225 if old != self.mangle(match.group(1)) and match.group(1) != '/dev/null':
226 self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
227 if not lines:
228 self._fail('Missing git diff output name.')
229 match = re.match(r'^\+\+\+ (.*)$', lines.pop(0))
230 if not match:
231 self._fail('Unexpected git diff: --- not following +++.')
232 if new != self.mangle(match.group(1)) and '/dev/null' != match.group(1):
233 self._fail('Unexpected git diff: %s != %s.' % (new, match.group(1)))
234 assert not lines, '_split_header() is broken'
235 break
236
237 def _verify_svn_header(self):
238 """Sanity checks the header.
239
240 A svn diff can contain only property changes, in that case there will be no
241 proper header. To make things worse, this property change header is
242 localized.
243 """
244 lines = self.diff_header.splitlines()
245 while lines:
246 match = re.match(r'^--- ([^\t]+).*$', lines.pop(0))
247 if not match:
248 continue
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000249 # For copy and renames, it's possible that the -- line doesn't match +++,
250 # so don't check match.group(1) to match self.filename or '/dev/null', it
251 # can be anything else.
252 # TODO(maruel): Handle rename/copy explicitly.
253 # if match.group(1) not in (self.filename, '/dev/null'):
254 # self.source_file = match.group(1)
255 if not lines:
256 self._fail('Nothing after header.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000257 match = re.match(r'^\+\+\+ ([^\t]+).*$', lines.pop(0))
258 if not match:
259 self._fail('Unexpected diff: --- not following +++.')
260 if match.group(1) not in (self.filename, '/dev/null'):
261 self._fail('Unexpected diff: %s.' % match.group(1))
262 assert not lines, '_split_header() is broken'
263 break
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000264 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000265 # Cheap check to make sure the file name is at least mentioned in the
266 # 'diff' header. That the only remaining invariant.
267 if not self.filename in self.diff_header:
268 self._fail('Diff seems corrupted.')
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000269
270
271class PatchSet(object):
272 """A list of FilePatch* objects."""
273
274 def __init__(self, patches):
275 self.patches = patches
276
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000277 def set_relpath(self, relpath):
278 """Used to offset the patch into a subdirectory."""
279 for patch in self.patches:
280 patch.set_relpath(relpath)
281
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000282 def __iter__(self):
283 for patch in self.patches:
284 yield patch
285
286 @property
287 def filenames(self):
288 return [p.filename for p in self.patches]