blob: c627f48c85b79a8ab1c65da9b30e654d012d4db1 [file] [log] [blame]
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00001# coding=utf8
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Utility functions to handle patches."""
6
maruel@chromium.orgcd619402011-04-09 00:08:00 +00007import posixpath
8import os
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00009import re
10
11
12class UnsupportedPatchFormat(Exception):
13 def __init__(self, filename, status):
14 super(UnsupportedPatchFormat, self).__init__(filename, status)
15 self.filename = filename
16 self.status = status
17
18 def __str__(self):
19 out = 'Can\'t process patch for file %s.' % self.filename
20 if self.status:
21 out += '\n%s' % self.status
22 return out
23
24
25class FilePatchBase(object):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000026 """Defines a single file being modified.
27
28 '/' is always used instead of os.sep for consistency.
29 """
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000030 is_delete = False
31 is_binary = False
32
maruel@chromium.orgcd619402011-04-09 00:08:00 +000033 def __init__(self, filename):
34 self.filename = None
35 self._set_filename(filename)
36
37 def _set_filename(self, filename):
38 self.filename = filename.replace('\\', '/')
39 # Blacklist a few characters for simplicity.
40 for i in ('%', '$', '..', '\'', '"'):
41 if i in self.filename:
42 self._fail('Can\'t use \'%s\' in filename.' % i)
43 for i in ('/', 'CON', 'COM'):
44 if self.filename.startswith(i):
45 self._fail('Filename can\'t start with \'%s\'.' % i)
46
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000047 def get(self):
48 raise NotImplementedError('Nothing to grab')
49
maruel@chromium.orgcd619402011-04-09 00:08:00 +000050 def set_relpath(self, relpath):
51 if not relpath:
52 return
53 relpath = relpath.replace('\\', '/')
54 if relpath[0] == '/':
55 self._fail('Relative path starts with %s' % relpath[0])
56 self._set_filename(posixpath.join(relpath, self.filename))
57
58 def _fail(self, msg):
59 raise UnsupportedPatchFormat(self.filename, msg)
60
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000061
62class FilePatchDelete(FilePatchBase):
63 """Deletes a file."""
64 is_delete = True
65
66 def __init__(self, filename, is_binary):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000067 super(FilePatchDelete, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000068 self.is_binary = is_binary
69
70 def get(self):
71 raise NotImplementedError('Nothing to grab')
72
73
74class FilePatchBinary(FilePatchBase):
75 """Content of a new binary file."""
76 is_binary = True
77
78 def __init__(self, filename, data, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000079 super(FilePatchBinary, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000080 self.data = data
81 self.svn_properties = svn_properties or []
82
83 def get(self):
84 return self.data
85
86
87class FilePatchDiff(FilePatchBase):
88 """Patch for a single file."""
89
90 def __init__(self, filename, diff, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000091 super(FilePatchDiff, self).__init__(filename)
maruel@chromium.org61e0b692011-04-12 21:01:01 +000092 if not diff:
93 self._fail('File doesn\'t have a diff.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +000094 self.diff_header, self.diff_hunks = self._split_header(diff)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000095 self.svn_properties = svn_properties or []
maruel@chromium.orgcd619402011-04-09 00:08:00 +000096 self.is_git_diff = self._is_git_diff_header(self.diff_header)
97 self.patchlevel = 0
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000098 if self.is_git_diff:
maruel@chromium.orgcd619402011-04-09 00:08:00 +000099 self._verify_git_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000100 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000101 self._verify_svn_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000102
103 def get(self):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000104 return self.diff_header + self.diff_hunks
105
106 def set_relpath(self, relpath):
107 old_filename = self.filename
108 super(FilePatchDiff, self).set_relpath(relpath)
109 # Update the header too.
110 self.diff_header = self.diff_header.replace(old_filename, self.filename)
111
112 def _split_header(self, diff):
113 """Splits a diff in two: the header and the hunks."""
114 header = []
115 hunks = diff.splitlines(True)
116 while hunks:
117 header.append(hunks.pop(0))
118 if header[-1].startswith('--- '):
119 break
120 else:
121 # Some diff may not have a ---/+++ set like a git rename with no change or
122 # a svn diff with only property change.
123 pass
124
125 if hunks:
126 if not hunks[0].startswith('+++ '):
127 self._fail('Inconsistent header')
128 header.append(hunks.pop(0))
129 if hunks:
130 if not hunks[0].startswith('@@ '):
131 self._fail('Inconsistent hunk header')
132
133 # Mangle any \\ in the header to /.
134 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
135 basename = os.path.basename(self.filename)
136 for i in xrange(len(header)):
137 if (header[i].split(' ', 1)[0] in header_lines or
138 header[i].endswith(basename)):
139 header[i] = header[i].replace('\\', '/')
140 return ''.join(header), ''.join(hunks)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000141
142 @staticmethod
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000143 def _is_git_diff_header(diff_header):
144 """Returns True if the diff for a single files was generated with git."""
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000145 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
146 # Rename partial change:
147 # http://codereview.chromium.org/download/issue6250123_3013_6010.diff
148 # Rename no change:
149 # http://codereview.chromium.org/download/issue6287022_3001_4010.diff
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000150 return any(l.startswith('diff --git') for l in diff_header.splitlines())
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000151
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000152 def mangle(self, string):
153 """Mangle a file path."""
154 return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:])
155
156 def _verify_git_header(self):
157 """Sanity checks the header.
158
159 Expects the following format:
160
161 <garbagge>
162 diff --git (|a/)<filename> (|b/)<filename>
163 <similarity>
164 <filemode changes>
165 <index>
166 <copy|rename from>
167 <copy|rename to>
168 --- <filename>
169 +++ <filename>
170
171 Everything is optional except the diff --git line.
172 """
173 lines = self.diff_header.splitlines()
174
175 # Verify the diff --git line.
176 old = None
177 new = None
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000178 while lines:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000179 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
180 if not match:
181 continue
182 old = match.group(1).replace('\\', '/')
183 new = match.group(2).replace('\\', '/')
184 if old.startswith('a/') and new.startswith('b/'):
185 self.patchlevel = 1
186 old = old[2:]
187 new = new[2:]
188 # The rename is about the new file so the old file can be anything.
189 if new not in (self.filename, 'dev/null'):
190 self._fail('Unexpected git diff output name %s.' % new)
191 if old == 'dev/null' and new == 'dev/null':
192 self._fail('Unexpected /dev/null git diff.')
193 break
194
195 if not old or not new:
196 self._fail('Unexpected git diff; couldn\'t find git header.')
197
198 # Handle these:
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000199 # new file mode \d{6}
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000200 # rename from <>
201 # rename to <>
202 # copy from <>
203 # copy to <>
204 while lines:
205 if lines[0].startswith('--- '):
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000206 break
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000207 line = lines.pop(0)
208 match = re.match(r'^(rename|copy) from (.+)$', line)
209 if match:
210 if old != match.group(2):
211 self._fail('Unexpected git diff input name for %s.' % match.group(1))
212 if not lines:
213 self._fail('Missing git diff output name for %s.' % match.group(1))
214 match = re.match(r'^(rename|copy) to (.+)$', lines.pop(0))
215 if not match:
216 self._fail('Missing git diff output name for %s.' % match.group(1))
217 if new != match.group(2):
218 self._fail('Unexpected git diff output name for %s.' % match.group(1))
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000219 continue
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000220
221 match = re.match(r'^new file mode (\d{6})$', line)
222 if match:
223 mode = match.group(1)
224 # Only look at owner ACL for executable.
225 if bool(int(mode[4]) & 4):
226 self.svn_properties.append(('svn:executable', '*'))
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000227
228 # Handle ---/+++
229 while lines:
230 match = re.match(r'^--- (.*)$', lines.pop(0))
231 if not match:
232 continue
233 if old != self.mangle(match.group(1)) and match.group(1) != '/dev/null':
234 self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
235 if not lines:
236 self._fail('Missing git diff output name.')
237 match = re.match(r'^\+\+\+ (.*)$', lines.pop(0))
238 if not match:
239 self._fail('Unexpected git diff: --- not following +++.')
240 if new != self.mangle(match.group(1)) and '/dev/null' != match.group(1):
241 self._fail('Unexpected git diff: %s != %s.' % (new, match.group(1)))
242 assert not lines, '_split_header() is broken'
243 break
244
245 def _verify_svn_header(self):
246 """Sanity checks the header.
247
248 A svn diff can contain only property changes, in that case there will be no
249 proper header. To make things worse, this property change header is
250 localized.
251 """
252 lines = self.diff_header.splitlines()
253 while lines:
254 match = re.match(r'^--- ([^\t]+).*$', lines.pop(0))
255 if not match:
256 continue
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000257 # For copy and renames, it's possible that the -- line doesn't match +++,
258 # so don't check match.group(1) to match self.filename or '/dev/null', it
259 # can be anything else.
260 # TODO(maruel): Handle rename/copy explicitly.
261 # if match.group(1) not in (self.filename, '/dev/null'):
262 # self.source_file = match.group(1)
263 if not lines:
264 self._fail('Nothing after header.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000265 match = re.match(r'^\+\+\+ ([^\t]+).*$', lines.pop(0))
266 if not match:
267 self._fail('Unexpected diff: --- not following +++.')
268 if match.group(1) not in (self.filename, '/dev/null'):
269 self._fail('Unexpected diff: %s.' % match.group(1))
270 assert not lines, '_split_header() is broken'
271 break
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000272 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000273 # Cheap check to make sure the file name is at least mentioned in the
274 # 'diff' header. That the only remaining invariant.
275 if not self.filename in self.diff_header:
276 self._fail('Diff seems corrupted.')
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000277
278
279class PatchSet(object):
280 """A list of FilePatch* objects."""
281
282 def __init__(self, patches):
283 self.patches = patches
maruel@chromium.org8a1396c2011-04-22 00:14:24 +0000284 for p in self.patches:
285 assert isinstance(p, FilePatchBase)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000286
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000287 def set_relpath(self, relpath):
288 """Used to offset the patch into a subdirectory."""
289 for patch in self.patches:
290 patch.set_relpath(relpath)
291
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000292 def __iter__(self):
293 for patch in self.patches:
294 yield patch
295
296 @property
297 def filenames(self):
298 return [p.filename for p in self.patches]