blob: b8325da482a841742658e7ec5ab8e985be3ed79f [file] [log] [blame]
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00001# coding=utf8
maruel@chromium.orgcf602552012-01-10 19:49:31 +00002# Copyright (c) 2012 The Chromium Authors. All rights reserved.
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00003# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Utility functions to handle patches."""
6
maruel@chromium.orgcd619402011-04-09 00:08:00 +00007import posixpath
8import os
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00009import re
10
11
12class UnsupportedPatchFormat(Exception):
13 def __init__(self, filename, status):
14 super(UnsupportedPatchFormat, self).__init__(filename, status)
15 self.filename = filename
16 self.status = status
17
18 def __str__(self):
19 out = 'Can\'t process patch for file %s.' % self.filename
20 if self.status:
21 out += '\n%s' % self.status
22 return out
23
24
25class FilePatchBase(object):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000026 """Defines a single file being modified.
27
28 '/' is always used instead of os.sep for consistency.
29 """
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000030 is_delete = False
31 is_binary = False
maruel@chromium.org97366be2011-06-03 20:02:46 +000032 is_new = False
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000033
maruel@chromium.orgcd619402011-04-09 00:08:00 +000034 def __init__(self, filename):
maruel@chromium.org5e975632011-09-29 18:07:06 +000035 assert self.__class__ is not FilePatchBase
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000036 self.filename = self._process_filename(filename)
maruel@chromium.orga19047c2011-09-08 12:49:58 +000037 # Set when the file is copied or moved.
38 self.source_filename = None
maruel@chromium.orgcd619402011-04-09 00:08:00 +000039
maruel@chromium.org8fab6b62012-02-16 21:50:35 +000040 @property
41 def filename_utf8(self):
42 return self.filename.encode('utf-8')
43
44 @property
45 def source_filename_utf8(self):
46 if self.source_filename is not None:
47 return self.source_filename.encode('utf-8')
48
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000049 @staticmethod
50 def _process_filename(filename):
51 filename = filename.replace('\\', '/')
maruel@chromium.orgcd619402011-04-09 00:08:00 +000052 # Blacklist a few characters for simplicity.
53 for i in ('%', '$', '..', '\'', '"'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000054 if i in filename:
55 raise UnsupportedPatchFormat(
56 filename, 'Can\'t use \'%s\' in filename.' % i)
maruel@chromium.orgcd619402011-04-09 00:08:00 +000057 for i in ('/', 'CON', 'COM'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000058 if filename.startswith(i):
59 raise UnsupportedPatchFormat(
60 filename, 'Filename can\'t start with \'%s\'.' % i)
61 return filename
maruel@chromium.orgcd619402011-04-09 00:08:00 +000062
maruel@chromium.orgcd619402011-04-09 00:08:00 +000063 def set_relpath(self, relpath):
64 if not relpath:
65 return
66 relpath = relpath.replace('\\', '/')
67 if relpath[0] == '/':
68 self._fail('Relative path starts with %s' % relpath[0])
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000069 self.filename = self._process_filename(
70 posixpath.join(relpath, self.filename))
maruel@chromium.orga19047c2011-09-08 12:49:58 +000071 if self.source_filename:
72 self.source_filename = self._process_filename(
73 posixpath.join(relpath, self.source_filename))
maruel@chromium.orgcd619402011-04-09 00:08:00 +000074
75 def _fail(self, msg):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000076 """Shortcut function to raise UnsupportedPatchFormat."""
maruel@chromium.orgcd619402011-04-09 00:08:00 +000077 raise UnsupportedPatchFormat(self.filename, msg)
78
maruel@chromium.org5e975632011-09-29 18:07:06 +000079 def __str__(self):
80 # Use a status-like board.
81 out = ''
82 if self.is_binary:
83 out += 'B'
84 else:
85 out += ' '
86 if self.is_delete:
87 out += 'D'
88 else:
89 out += ' '
90 if self.is_new:
91 out += 'N'
92 else:
93 out += ' '
94 if self.source_filename:
95 out += 'R'
96 else:
97 out += ' '
maruel@chromium.orgcf602552012-01-10 19:49:31 +000098 out += ' '
99 if self.source_filename:
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000100 out += '%s->' % self.source_filename_utf8
101 return out + self.filename_utf8
maruel@chromium.org5e975632011-09-29 18:07:06 +0000102
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000103
104class FilePatchDelete(FilePatchBase):
105 """Deletes a file."""
106 is_delete = True
107
108 def __init__(self, filename, is_binary):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000109 super(FilePatchDelete, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000110 self.is_binary = is_binary
111
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000112
113class FilePatchBinary(FilePatchBase):
114 """Content of a new binary file."""
115 is_binary = True
116
maruel@chromium.org97366be2011-06-03 20:02:46 +0000117 def __init__(self, filename, data, svn_properties, is_new):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000118 super(FilePatchBinary, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000119 self.data = data
120 self.svn_properties = svn_properties or []
maruel@chromium.org97366be2011-06-03 20:02:46 +0000121 self.is_new = is_new
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000122
123 def get(self):
124 return self.data
125
126
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000127class Hunk(object):
128 """Parsed hunk data container."""
129
130 def __init__(self, start_src, lines_src, start_dst, lines_dst):
131 self.start_src = start_src
132 self.lines_src = lines_src
133 self.start_dst = start_dst
134 self.lines_dst = lines_dst
135 self.variation = self.lines_dst - self.lines_src
136 self.text = []
137
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000138 def __repr__(self):
139 return '%s<(%d, %d) to (%d, %d)>' % (
140 self.__class__.__name__,
141 self.start_src, self.lines_src, self.start_dst, self.lines_dst)
142
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000143
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000144class FilePatchDiff(FilePatchBase):
145 """Patch for a single file."""
146
147 def __init__(self, filename, diff, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000148 super(FilePatchDiff, self).__init__(filename)
maruel@chromium.org61e0b692011-04-12 21:01:01 +0000149 if not diff:
150 self._fail('File doesn\'t have a diff.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000151 self.diff_header, self.diff_hunks = self._split_header(diff)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000152 self.svn_properties = svn_properties or []
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000153 self.is_git_diff = self._is_git_diff_header(self.diff_header)
154 self.patchlevel = 0
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000155 if self.is_git_diff:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000156 self._verify_git_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000157 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000158 self._verify_svn_header()
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000159 self.hunks = self._split_hunks()
maruel@chromium.org5e975632011-09-29 18:07:06 +0000160 if self.source_filename and not self.is_new:
161 self._fail('If source_filename is set, is_new must be also be set')
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000162
maruel@chromium.org5e975632011-09-29 18:07:06 +0000163 def get(self, for_git):
164 if for_git or not self.source_filename:
165 return self.diff_header + self.diff_hunks
166 else:
167 # patch is stupid. It patches the source_filename instead so get rid of
168 # any source_filename reference if needed.
169 return (
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000170 self.diff_header.replace(
171 self.source_filename_utf8, self.filename_utf8) +
maruel@chromium.org5e975632011-09-29 18:07:06 +0000172 self.diff_hunks)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000173
174 def set_relpath(self, relpath):
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000175 old_filename = self.filename_utf8
176 old_source_filename = self.source_filename_utf8 or self.filename_utf8
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000177 super(FilePatchDiff, self).set_relpath(relpath)
178 # Update the header too.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000179 filename = self.filename_utf8
180 source_filename = self.source_filename_utf8 or self.filename_utf8
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000181 lines = self.diff_header.splitlines(True)
182 for i, line in enumerate(lines):
183 if line.startswith('diff --git'):
184 lines[i] = line.replace(
185 'a/' + old_source_filename, source_filename).replace(
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000186 'b/' + old_filename, filename)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000187 elif re.match(r'^\w+ from .+$', line) or line.startswith('---'):
188 lines[i] = line.replace(old_source_filename, source_filename)
189 elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'):
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000190 lines[i] = line.replace(old_filename, filename)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000191 self.diff_header = ''.join(lines)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000192
193 def _split_header(self, diff):
194 """Splits a diff in two: the header and the hunks."""
195 header = []
196 hunks = diff.splitlines(True)
197 while hunks:
198 header.append(hunks.pop(0))
199 if header[-1].startswith('--- '):
200 break
201 else:
202 # Some diff may not have a ---/+++ set like a git rename with no change or
203 # a svn diff with only property change.
204 pass
205
206 if hunks:
207 if not hunks[0].startswith('+++ '):
208 self._fail('Inconsistent header')
209 header.append(hunks.pop(0))
210 if hunks:
211 if not hunks[0].startswith('@@ '):
212 self._fail('Inconsistent hunk header')
213
214 # Mangle any \\ in the header to /.
215 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000216 basename = os.path.basename(self.filename_utf8)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000217 for i in xrange(len(header)):
218 if (header[i].split(' ', 1)[0] in header_lines or
219 header[i].endswith(basename)):
220 header[i] = header[i].replace('\\', '/')
221 return ''.join(header), ''.join(hunks)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000222
223 @staticmethod
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000224 def _is_git_diff_header(diff_header):
225 """Returns True if the diff for a single files was generated with git."""
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000226 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
227 # Rename partial change:
228 # http://codereview.chromium.org/download/issue6250123_3013_6010.diff
229 # Rename no change:
230 # http://codereview.chromium.org/download/issue6287022_3001_4010.diff
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000231 return any(l.startswith('diff --git') for l in diff_header.splitlines())
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000232
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000233 def _split_hunks(self):
234 """Splits the hunks and does verification."""
235 hunks = []
236 for line in self.diff_hunks.splitlines(True):
237 if line.startswith('@@'):
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000238 match = re.match(r'^@@ -([\d,]+) \+([\d,]+) @@.*$', line)
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000239 # File add will result in "-0,0 +1" but file deletion will result in
240 # "-1,N +0,0" where N is the number of lines deleted. That's from diff
241 # and svn diff. git diff doesn't exhibit this behavior.
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000242 # svn diff for a single line file rewrite "@@ -1 +1 @@". Fun.
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000243 # "@@ -1 +1,N @@" is also valid where N is the length of the new file.
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000244 if not match:
245 self._fail('Hunk header is unparsable')
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000246 count = match.group(1).count(',')
247 if not count:
248 start_src = int(match.group(1))
249 lines_src = 1
250 elif count == 1:
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000251 start_src, lines_src = map(int, match.group(1).split(',', 1))
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000252 else:
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000253 self._fail('Hunk header is malformed')
254
255 count = match.group(2).count(',')
256 if not count:
257 start_dst = int(match.group(2))
258 lines_dst = 1
259 elif count == 1:
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000260 start_dst, lines_dst = map(int, match.group(2).split(',', 1))
261 else:
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000262 self._fail('Hunk header is malformed')
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000263 new_hunk = Hunk(start_src, lines_src, start_dst, lines_dst)
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000264 if hunks:
265 if new_hunk.start_src <= hunks[-1].start_src:
266 self._fail('Hunks source lines are not ordered')
267 if new_hunk.start_dst <= hunks[-1].start_dst:
268 self._fail('Hunks destination lines are not ordered')
269 hunks.append(new_hunk)
270 continue
271 hunks[-1].text.append(line)
272
273 if len(hunks) == 1:
274 if hunks[0].start_src == 0 and hunks[0].lines_src == 0:
275 self.is_new = True
276 if hunks[0].start_dst == 0 and hunks[0].lines_dst == 0:
277 self.is_delete = True
278
279 if self.is_new and self.is_delete:
280 self._fail('Hunk header is all 0')
281
282 if not self.is_new and not self.is_delete:
283 for hunk in hunks:
284 variation = (
285 len([1 for i in hunk.text if i.startswith('+')]) -
286 len([1 for i in hunk.text if i.startswith('-')]))
287 if variation != hunk.variation:
288 self._fail(
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000289 'Hunk header is incorrect: %d vs %d; %r' % (
290 variation, hunk.variation, hunk))
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000291 if not hunk.start_src:
292 self._fail(
293 'Hunk header start line is incorrect: %d' % hunk.start_src)
294 if not hunk.start_dst:
295 self._fail(
296 'Hunk header start line is incorrect: %d' % hunk.start_dst)
297 hunk.start_src -= 1
298 hunk.start_dst -= 1
299 if self.is_new and hunks:
300 hunks[0].start_dst -= 1
301 if self.is_delete and hunks:
302 hunks[0].start_src -= 1
303 return hunks
304
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000305 def mangle(self, string):
306 """Mangle a file path."""
307 return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:])
308
309 def _verify_git_header(self):
310 """Sanity checks the header.
311
312 Expects the following format:
313
314 <garbagge>
315 diff --git (|a/)<filename> (|b/)<filename>
316 <similarity>
317 <filemode changes>
318 <index>
319 <copy|rename from>
320 <copy|rename to>
321 --- <filename>
322 +++ <filename>
323
324 Everything is optional except the diff --git line.
325 """
326 lines = self.diff_header.splitlines()
327
328 # Verify the diff --git line.
329 old = None
330 new = None
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000331 while lines:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000332 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
333 if not match:
334 continue
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000335 if match.group(1).startswith('a/') and match.group(2).startswith('b/'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000336 self.patchlevel = 1
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000337 old = self.mangle(match.group(1))
338 new = self.mangle(match.group(2))
339
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000340 # The rename is about the new file so the old file can be anything.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000341 if new not in (self.filename_utf8, 'dev/null'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000342 self._fail('Unexpected git diff output name %s.' % new)
343 if old == 'dev/null' and new == 'dev/null':
344 self._fail('Unexpected /dev/null git diff.')
345 break
346
347 if not old or not new:
348 self._fail('Unexpected git diff; couldn\'t find git header.')
349
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000350 if old not in (self.filename_utf8, 'dev/null'):
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000351 # Copy or rename.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000352 self.source_filename = old.decode('utf-8')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000353 self.is_new = True
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000354
maruel@chromium.org97366be2011-06-03 20:02:46 +0000355 last_line = ''
356
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000357 while lines:
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000358 line = lines.pop(0)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000359 self._verify_git_header_process_line(lines, line, last_line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000360 last_line = line
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000361
maruel@chromium.org97366be2011-06-03 20:02:46 +0000362 # Cheap check to make sure the file name is at least mentioned in the
363 # 'diff' header. That the only remaining invariant.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000364 if not self.filename_utf8 in self.diff_header:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000365 self._fail('Diff seems corrupted.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000366
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000367 def _verify_git_header_process_line(self, lines, line, last_line):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000368 """Processes a single line of the header.
369
370 Returns True if it should continue looping.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000371
372 Format is described to
373 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
maruel@chromium.org97366be2011-06-03 20:02:46 +0000374 """
maruel@chromium.org97366be2011-06-03 20:02:46 +0000375 match = re.match(r'^(rename|copy) from (.+)$', line)
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000376 old = self.source_filename_utf8 or self.filename_utf8
maruel@chromium.org97366be2011-06-03 20:02:46 +0000377 if match:
378 if old != match.group(2):
379 self._fail('Unexpected git diff input name for line %s.' % line)
380 if not lines or not lines[0].startswith('%s to ' % match.group(1)):
381 self._fail(
382 'Confused %s from/to git diff for line %s.' %
383 (match.group(1), line))
384 return
385
maruel@chromium.org97366be2011-06-03 20:02:46 +0000386 match = re.match(r'^(rename|copy) to (.+)$', line)
387 if match:
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000388 if self.filename_utf8 != match.group(2):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000389 self._fail('Unexpected git diff output name for line %s.' % line)
390 if not last_line.startswith('%s from ' % match.group(1)):
391 self._fail(
392 'Confused %s from/to git diff for line %s.' %
393 (match.group(1), line))
394 return
395
maruel@chromium.org40052252011-11-11 20:54:55 +0000396 match = re.match(r'^deleted file mode (\d{6})$', line)
397 if match:
398 # It is necessary to parse it because there may be no hunk, like when the
399 # file was empty.
400 self.is_delete = True
401 return
402
maruel@chromium.org378a4192011-06-06 13:36:02 +0000403 match = re.match(r'^new(| file) mode (\d{6})$', line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000404 if match:
maruel@chromium.org378a4192011-06-06 13:36:02 +0000405 mode = match.group(2)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000406 # Only look at owner ACL for executable.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000407 # TODO(maruel): Add support to remove a property.
maruel@chromium.org86eb9e72011-06-03 20:14:52 +0000408 if bool(int(mode[4]) & 1):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000409 self.svn_properties.append(('svn:executable', '*'))
maruel@chromium.org40052252011-11-11 20:54:55 +0000410 return
maruel@chromium.org97366be2011-06-03 20:02:46 +0000411
maruel@chromium.org97366be2011-06-03 20:02:46 +0000412 match = re.match(r'^--- (.*)$', line)
413 if match:
414 if last_line[:3] in ('---', '+++'):
415 self._fail('--- and +++ are reversed')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000416 if match.group(1) == '/dev/null':
417 self.is_new = True
418 elif self.mangle(match.group(1)) != old:
419 # git patches are always well formatted, do not allow random filenames.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000420 self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000421 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000422 self._fail('Missing git diff output name.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000423 return
424
maruel@chromium.org97366be2011-06-03 20:02:46 +0000425 match = re.match(r'^\+\+\+ (.*)$', line)
426 if match:
427 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000428 self._fail('Unexpected git diff: --- not following +++.')
maruel@chromium.orgbe605652011-09-02 20:28:07 +0000429 if '/dev/null' == match.group(1):
430 self.is_delete = True
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000431 elif self.filename_utf8 != self.mangle(match.group(1)):
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000432 self._fail(
433 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000434 if lines:
435 self._fail('Crap after +++')
436 # We're done.
437 return
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000438
439 def _verify_svn_header(self):
440 """Sanity checks the header.
441
442 A svn diff can contain only property changes, in that case there will be no
443 proper header. To make things worse, this property change header is
444 localized.
445 """
446 lines = self.diff_header.splitlines()
maruel@chromium.org97366be2011-06-03 20:02:46 +0000447 last_line = ''
448
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000449 while lines:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000450 line = lines.pop(0)
451 self._verify_svn_header_process_line(lines, line, last_line)
452 last_line = line
453
454 # Cheap check to make sure the file name is at least mentioned in the
455 # 'diff' header. That the only remaining invariant.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000456 if not self.filename_utf8 in self.diff_header:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000457 self._fail('Diff seems corrupted.')
458
459 def _verify_svn_header_process_line(self, lines, line, last_line):
460 """Processes a single line of the header.
461
462 Returns True if it should continue looping.
463 """
464 match = re.match(r'^--- ([^\t]+).*$', line)
465 if match:
466 if last_line[:3] in ('---', '+++'):
467 self._fail('--- and +++ are reversed')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000468 if match.group(1) == '/dev/null':
469 self.is_new = True
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000470 elif self.mangle(match.group(1)) != self.filename_utf8:
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000471 # guess the source filename.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000472 self.source_filename = match.group(1).decode('utf-8')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000473 self.is_new = True
maruel@chromium.org97366be2011-06-03 20:02:46 +0000474 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000475 self._fail('Nothing after header.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000476 return
477
478 match = re.match(r'^\+\+\+ ([^\t]+).*$', line)
479 if match:
480 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000481 self._fail('Unexpected diff: --- not following +++.')
maruel@chromium.orgbe605652011-09-02 20:28:07 +0000482 if match.group(1) == '/dev/null':
483 self.is_delete = True
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000484 elif self.mangle(match.group(1)) != self.filename_utf8:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000485 self._fail('Unexpected diff: %s.' % match.group(1))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000486 if lines:
487 self._fail('Crap after +++')
488 # We're done.
489 return
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000490
491
492class PatchSet(object):
493 """A list of FilePatch* objects."""
494
495 def __init__(self, patches):
maruel@chromium.org5e975632011-09-29 18:07:06 +0000496 for p in patches:
maruel@chromium.org8a1396c2011-04-22 00:14:24 +0000497 assert isinstance(p, FilePatchBase)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000498
maruel@chromium.org5e975632011-09-29 18:07:06 +0000499 def key(p):
500 """Sort by ordering of application.
501
502 File move are first.
503 Deletes are last.
504 """
505 if p.source_filename:
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000506 return (p.is_delete, p.source_filename_utf8, p.filename_utf8)
maruel@chromium.org5e975632011-09-29 18:07:06 +0000507 else:
508 # tuple are always greater than string, abuse that fact.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000509 return (p.is_delete, (p.filename_utf8,), p.filename_utf8)
maruel@chromium.org5e975632011-09-29 18:07:06 +0000510
511 self.patches = sorted(patches, key=key)
512
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000513 def set_relpath(self, relpath):
514 """Used to offset the patch into a subdirectory."""
515 for patch in self.patches:
516 patch.set_relpath(relpath)
517
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000518 def __iter__(self):
519 for patch in self.patches:
520 yield patch
521
maruel@chromium.org5e975632011-09-29 18:07:06 +0000522 def __getitem__(self, key):
523 return self.patches[key]
524
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000525 @property
526 def filenames(self):
527 return [p.filename for p in self.patches]