blob: a7eef3f24fc2f1468dc25ff35c79a999b2c83155 [file] [log] [blame]
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00001# coding=utf8
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Utility functions to handle patches."""
6
maruel@chromium.orgcd619402011-04-09 00:08:00 +00007import posixpath
8import os
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00009import re
10
11
12class UnsupportedPatchFormat(Exception):
13 def __init__(self, filename, status):
14 super(UnsupportedPatchFormat, self).__init__(filename, status)
15 self.filename = filename
16 self.status = status
17
18 def __str__(self):
19 out = 'Can\'t process patch for file %s.' % self.filename
20 if self.status:
21 out += '\n%s' % self.status
22 return out
23
24
25class FilePatchBase(object):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000026 """Defines a single file being modified.
27
28 '/' is always used instead of os.sep for consistency.
29 """
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000030 is_delete = False
31 is_binary = False
maruel@chromium.org97366be2011-06-03 20:02:46 +000032 is_new = False
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000033
maruel@chromium.orgcd619402011-04-09 00:08:00 +000034 def __init__(self, filename):
35 self.filename = None
36 self._set_filename(filename)
37
38 def _set_filename(self, filename):
39 self.filename = filename.replace('\\', '/')
40 # Blacklist a few characters for simplicity.
41 for i in ('%', '$', '..', '\'', '"'):
42 if i in self.filename:
43 self._fail('Can\'t use \'%s\' in filename.' % i)
44 for i in ('/', 'CON', 'COM'):
45 if self.filename.startswith(i):
46 self._fail('Filename can\'t start with \'%s\'.' % i)
47
maruel@chromium.org97366be2011-06-03 20:02:46 +000048 def get(self): # pragma: no coverage
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000049 raise NotImplementedError('Nothing to grab')
50
maruel@chromium.orgcd619402011-04-09 00:08:00 +000051 def set_relpath(self, relpath):
52 if not relpath:
53 return
54 relpath = relpath.replace('\\', '/')
55 if relpath[0] == '/':
56 self._fail('Relative path starts with %s' % relpath[0])
57 self._set_filename(posixpath.join(relpath, self.filename))
58
59 def _fail(self, msg):
60 raise UnsupportedPatchFormat(self.filename, msg)
61
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000062
63class FilePatchDelete(FilePatchBase):
64 """Deletes a file."""
65 is_delete = True
66
67 def __init__(self, filename, is_binary):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000068 super(FilePatchDelete, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000069 self.is_binary = is_binary
70
71 def get(self):
72 raise NotImplementedError('Nothing to grab')
73
74
75class FilePatchBinary(FilePatchBase):
76 """Content of a new binary file."""
77 is_binary = True
78
maruel@chromium.org97366be2011-06-03 20:02:46 +000079 def __init__(self, filename, data, svn_properties, is_new):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000080 super(FilePatchBinary, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000081 self.data = data
82 self.svn_properties = svn_properties or []
maruel@chromium.org97366be2011-06-03 20:02:46 +000083 self.is_new = is_new
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000084
85 def get(self):
86 return self.data
87
88
89class FilePatchDiff(FilePatchBase):
90 """Patch for a single file."""
91
92 def __init__(self, filename, diff, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000093 super(FilePatchDiff, self).__init__(filename)
maruel@chromium.org61e0b692011-04-12 21:01:01 +000094 if not diff:
95 self._fail('File doesn\'t have a diff.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +000096 self.diff_header, self.diff_hunks = self._split_header(diff)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000097 self.svn_properties = svn_properties or []
maruel@chromium.orgcd619402011-04-09 00:08:00 +000098 self.is_git_diff = self._is_git_diff_header(self.diff_header)
99 self.patchlevel = 0
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000100 if self.is_git_diff:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000101 self._verify_git_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000102 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000103 self._verify_svn_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000104
105 def get(self):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000106 return self.diff_header + self.diff_hunks
107
108 def set_relpath(self, relpath):
109 old_filename = self.filename
110 super(FilePatchDiff, self).set_relpath(relpath)
111 # Update the header too.
112 self.diff_header = self.diff_header.replace(old_filename, self.filename)
113
114 def _split_header(self, diff):
115 """Splits a diff in two: the header and the hunks."""
116 header = []
117 hunks = diff.splitlines(True)
118 while hunks:
119 header.append(hunks.pop(0))
120 if header[-1].startswith('--- '):
121 break
122 else:
123 # Some diff may not have a ---/+++ set like a git rename with no change or
124 # a svn diff with only property change.
125 pass
126
127 if hunks:
128 if not hunks[0].startswith('+++ '):
129 self._fail('Inconsistent header')
130 header.append(hunks.pop(0))
131 if hunks:
132 if not hunks[0].startswith('@@ '):
133 self._fail('Inconsistent hunk header')
134
135 # Mangle any \\ in the header to /.
136 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
137 basename = os.path.basename(self.filename)
138 for i in xrange(len(header)):
139 if (header[i].split(' ', 1)[0] in header_lines or
140 header[i].endswith(basename)):
141 header[i] = header[i].replace('\\', '/')
142 return ''.join(header), ''.join(hunks)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000143
144 @staticmethod
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000145 def _is_git_diff_header(diff_header):
146 """Returns True if the diff for a single files was generated with git."""
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000147 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
148 # Rename partial change:
149 # http://codereview.chromium.org/download/issue6250123_3013_6010.diff
150 # Rename no change:
151 # http://codereview.chromium.org/download/issue6287022_3001_4010.diff
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000152 return any(l.startswith('diff --git') for l in diff_header.splitlines())
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000153
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000154 def mangle(self, string):
155 """Mangle a file path."""
156 return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:])
157
158 def _verify_git_header(self):
159 """Sanity checks the header.
160
161 Expects the following format:
162
163 <garbagge>
164 diff --git (|a/)<filename> (|b/)<filename>
165 <similarity>
166 <filemode changes>
167 <index>
168 <copy|rename from>
169 <copy|rename to>
170 --- <filename>
171 +++ <filename>
172
173 Everything is optional except the diff --git line.
174 """
175 lines = self.diff_header.splitlines()
176
177 # Verify the diff --git line.
178 old = None
179 new = None
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000180 while lines:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000181 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
182 if not match:
183 continue
184 old = match.group(1).replace('\\', '/')
185 new = match.group(2).replace('\\', '/')
186 if old.startswith('a/') and new.startswith('b/'):
187 self.patchlevel = 1
188 old = old[2:]
189 new = new[2:]
190 # The rename is about the new file so the old file can be anything.
191 if new not in (self.filename, 'dev/null'):
192 self._fail('Unexpected git diff output name %s.' % new)
193 if old == 'dev/null' and new == 'dev/null':
194 self._fail('Unexpected /dev/null git diff.')
195 break
196
197 if not old or not new:
198 self._fail('Unexpected git diff; couldn\'t find git header.')
199
maruel@chromium.org97366be2011-06-03 20:02:46 +0000200 last_line = ''
201
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000202 while lines:
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000203 line = lines.pop(0)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000204 # TODO(maruel): old should be replace with self.source_file
205 # TODO(maruel): new == self.filename and remove new
206 self._verify_git_header_process_line(lines, line, last_line, old, new)
207 last_line = line
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000208
maruel@chromium.org97366be2011-06-03 20:02:46 +0000209 # Cheap check to make sure the file name is at least mentioned in the
210 # 'diff' header. That the only remaining invariant.
211 if not self.filename in self.diff_header:
212 self._fail('Diff seems corrupted.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000213
maruel@chromium.org97366be2011-06-03 20:02:46 +0000214 def _verify_git_header_process_line(self, lines, line, last_line, old, new):
215 """Processes a single line of the header.
216
217 Returns True if it should continue looping.
218 """
219 # Handle these:
220 # rename from <>
221 # copy from <>
222 match = re.match(r'^(rename|copy) from (.+)$', line)
223 if match:
224 if old != match.group(2):
225 self._fail('Unexpected git diff input name for line %s.' % line)
226 if not lines or not lines[0].startswith('%s to ' % match.group(1)):
227 self._fail(
228 'Confused %s from/to git diff for line %s.' %
229 (match.group(1), line))
230 return
231
232 # Handle these:
233 # rename to <>
234 # copy to <>
235 match = re.match(r'^(rename|copy) to (.+)$', line)
236 if match:
237 if new != match.group(2):
238 self._fail('Unexpected git diff output name for line %s.' % line)
239 if not last_line.startswith('%s from ' % match.group(1)):
240 self._fail(
241 'Confused %s from/to git diff for line %s.' %
242 (match.group(1), line))
243 return
244
245 # Handle "new file mode \d{6}"
246 match = re.match(r'^new file mode (\d{6})$', line)
247 if match:
248 mode = match.group(1)
249 # Only look at owner ACL for executable.
maruel@chromium.org86eb9e72011-06-03 20:14:52 +0000250 if bool(int(mode[4]) & 1):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000251 self.svn_properties.append(('svn:executable', '*'))
252
253 # Handle "--- "
254 match = re.match(r'^--- (.*)$', line)
255 if match:
256 if last_line[:3] in ('---', '+++'):
257 self._fail('--- and +++ are reversed')
258 self.is_new = match.group(1) == '/dev/null'
259 # TODO(maruel): Use self.source_file.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000260 if old != self.mangle(match.group(1)) and match.group(1) != '/dev/null':
261 self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000262 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000263 self._fail('Missing git diff output name.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000264 return
265
266 # Handle "+++ "
267 match = re.match(r'^\+\+\+ (.*)$', line)
268 if match:
269 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000270 self._fail('Unexpected git diff: --- not following +++.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000271 # TODO(maruel): new == self.filename.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000272 if new != self.mangle(match.group(1)) and '/dev/null' != match.group(1):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000273 # TODO(maruel): Can +++ be /dev/null? If so, assert self.is_delete ==
274 # True.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000275 self._fail('Unexpected git diff: %s != %s.' % (new, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000276 if lines:
277 self._fail('Crap after +++')
278 # We're done.
279 return
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000280
281 def _verify_svn_header(self):
282 """Sanity checks the header.
283
284 A svn diff can contain only property changes, in that case there will be no
285 proper header. To make things worse, this property change header is
286 localized.
287 """
288 lines = self.diff_header.splitlines()
maruel@chromium.org97366be2011-06-03 20:02:46 +0000289 last_line = ''
290
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000291 while lines:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000292 line = lines.pop(0)
293 self._verify_svn_header_process_line(lines, line, last_line)
294 last_line = line
295
296 # Cheap check to make sure the file name is at least mentioned in the
297 # 'diff' header. That the only remaining invariant.
298 if not self.filename in self.diff_header:
299 self._fail('Diff seems corrupted.')
300
301 def _verify_svn_header_process_line(self, lines, line, last_line):
302 """Processes a single line of the header.
303
304 Returns True if it should continue looping.
305 """
306 match = re.match(r'^--- ([^\t]+).*$', line)
307 if match:
308 if last_line[:3] in ('---', '+++'):
309 self._fail('--- and +++ are reversed')
310 self.is_new = match.group(1) == '/dev/null'
311 # For copy and renames, it's possible that the -- line doesn't match
312 # +++, so don't check match.group(1) to match self.filename or
313 # '/dev/null', it can be anything else.
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000314 # TODO(maruel): Handle rename/copy explicitly.
maruel@chromium.org97366be2011-06-03 20:02:46 +0000315 # if (self.mangle(match.group(1)) != self.filename and
316 # match.group(1) != '/dev/null'):
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000317 # self.source_file = match.group(1)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000318 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000319 self._fail('Nothing after header.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000320 return
321
322 match = re.match(r'^\+\+\+ ([^\t]+).*$', line)
323 if match:
324 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000325 self._fail('Unexpected diff: --- not following +++.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000326 if (self.mangle(match.group(1)) != self.filename and
327 match.group(1) != '/dev/null'):
328 # TODO(maruel): Can +++ be /dev/null? If so, assert self.is_delete ==
329 # True.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000330 self._fail('Unexpected diff: %s.' % match.group(1))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000331 if lines:
332 self._fail('Crap after +++')
333 # We're done.
334 return
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000335
336
337class PatchSet(object):
338 """A list of FilePatch* objects."""
339
340 def __init__(self, patches):
341 self.patches = patches
maruel@chromium.org8a1396c2011-04-22 00:14:24 +0000342 for p in self.patches:
343 assert isinstance(p, FilePatchBase)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000344
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000345 def set_relpath(self, relpath):
346 """Used to offset the patch into a subdirectory."""
347 for patch in self.patches:
348 patch.set_relpath(relpath)
349
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000350 def __iter__(self):
351 for patch in self.patches:
352 yield patch
353
354 @property
355 def filenames(self):
356 return [p.filename for p in self.patches]