blob: 9083094e834dac7f3cd8fce4bb33669389f65048 [file] [log] [blame]
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00001# coding=utf8
maruel@chromium.orgcf602552012-01-10 19:49:31 +00002# Copyright (c) 2012 The Chromium Authors. All rights reserved.
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00003# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Utility functions to handle patches."""
6
maruel@chromium.orgcd619402011-04-09 00:08:00 +00007import posixpath
8import os
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00009import re
10
11
12class UnsupportedPatchFormat(Exception):
13 def __init__(self, filename, status):
14 super(UnsupportedPatchFormat, self).__init__(filename, status)
15 self.filename = filename
16 self.status = status
17
18 def __str__(self):
19 out = 'Can\'t process patch for file %s.' % self.filename
20 if self.status:
21 out += '\n%s' % self.status
22 return out
23
24
25class FilePatchBase(object):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000026 """Defines a single file being modified.
27
28 '/' is always used instead of os.sep for consistency.
29 """
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000030 is_delete = False
31 is_binary = False
maruel@chromium.org97366be2011-06-03 20:02:46 +000032 is_new = False
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000033
maruel@chromium.orgcd619402011-04-09 00:08:00 +000034 def __init__(self, filename):
maruel@chromium.org5e975632011-09-29 18:07:06 +000035 assert self.__class__ is not FilePatchBase
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000036 self.filename = self._process_filename(filename)
maruel@chromium.orga19047c2011-09-08 12:49:58 +000037 # Set when the file is copied or moved.
38 self.source_filename = None
maruel@chromium.orgcd619402011-04-09 00:08:00 +000039
maruel@chromium.org8fab6b62012-02-16 21:50:35 +000040 @property
41 def filename_utf8(self):
42 return self.filename.encode('utf-8')
43
44 @property
45 def source_filename_utf8(self):
46 if self.source_filename is not None:
47 return self.source_filename.encode('utf-8')
48
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000049 @staticmethod
50 def _process_filename(filename):
51 filename = filename.replace('\\', '/')
maruel@chromium.orgcd619402011-04-09 00:08:00 +000052 # Blacklist a few characters for simplicity.
53 for i in ('%', '$', '..', '\'', '"'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000054 if i in filename:
55 raise UnsupportedPatchFormat(
56 filename, 'Can\'t use \'%s\' in filename.' % i)
maruel@chromium.orgcd619402011-04-09 00:08:00 +000057 for i in ('/', 'CON', 'COM'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000058 if filename.startswith(i):
59 raise UnsupportedPatchFormat(
60 filename, 'Filename can\'t start with \'%s\'.' % i)
61 return filename
maruel@chromium.orgcd619402011-04-09 00:08:00 +000062
maruel@chromium.orgcd619402011-04-09 00:08:00 +000063 def set_relpath(self, relpath):
64 if not relpath:
65 return
66 relpath = relpath.replace('\\', '/')
67 if relpath[0] == '/':
68 self._fail('Relative path starts with %s' % relpath[0])
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000069 self.filename = self._process_filename(
70 posixpath.join(relpath, self.filename))
maruel@chromium.orga19047c2011-09-08 12:49:58 +000071 if self.source_filename:
72 self.source_filename = self._process_filename(
73 posixpath.join(relpath, self.source_filename))
maruel@chromium.orgcd619402011-04-09 00:08:00 +000074
75 def _fail(self, msg):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000076 """Shortcut function to raise UnsupportedPatchFormat."""
maruel@chromium.orgcd619402011-04-09 00:08:00 +000077 raise UnsupportedPatchFormat(self.filename, msg)
78
maruel@chromium.org5e975632011-09-29 18:07:06 +000079 def __str__(self):
80 # Use a status-like board.
81 out = ''
82 if self.is_binary:
83 out += 'B'
84 else:
85 out += ' '
86 if self.is_delete:
87 out += 'D'
88 else:
89 out += ' '
90 if self.is_new:
91 out += 'N'
92 else:
93 out += ' '
94 if self.source_filename:
95 out += 'R'
96 else:
97 out += ' '
maruel@chromium.orgcf602552012-01-10 19:49:31 +000098 out += ' '
99 if self.source_filename:
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000100 out += '%s->' % self.source_filename_utf8
101 return out + self.filename_utf8
maruel@chromium.org5e975632011-09-29 18:07:06 +0000102
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000103
104class FilePatchDelete(FilePatchBase):
105 """Deletes a file."""
106 is_delete = True
107
108 def __init__(self, filename, is_binary):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000109 super(FilePatchDelete, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000110 self.is_binary = is_binary
111
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000112
113class FilePatchBinary(FilePatchBase):
114 """Content of a new binary file."""
115 is_binary = True
116
maruel@chromium.org97366be2011-06-03 20:02:46 +0000117 def __init__(self, filename, data, svn_properties, is_new):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000118 super(FilePatchBinary, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000119 self.data = data
120 self.svn_properties = svn_properties or []
maruel@chromium.org97366be2011-06-03 20:02:46 +0000121 self.is_new = is_new
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000122
123 def get(self):
124 return self.data
125
126
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000127class Hunk(object):
128 """Parsed hunk data container."""
129
130 def __init__(self, start_src, lines_src, start_dst, lines_dst):
131 self.start_src = start_src
132 self.lines_src = lines_src
133 self.start_dst = start_dst
134 self.lines_dst = lines_dst
135 self.variation = self.lines_dst - self.lines_src
136 self.text = []
137
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000138 def __repr__(self):
139 return '%s<(%d, %d) to (%d, %d)>' % (
140 self.__class__.__name__,
141 self.start_src, self.lines_src, self.start_dst, self.lines_dst)
142
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000143
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000144class FilePatchDiff(FilePatchBase):
145 """Patch for a single file."""
146
147 def __init__(self, filename, diff, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000148 super(FilePatchDiff, self).__init__(filename)
maruel@chromium.org61e0b692011-04-12 21:01:01 +0000149 if not diff:
150 self._fail('File doesn\'t have a diff.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000151 self.diff_header, self.diff_hunks = self._split_header(diff)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000152 self.svn_properties = svn_properties or []
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000153 self.is_git_diff = self._is_git_diff_header(self.diff_header)
154 self.patchlevel = 0
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000155 if self.is_git_diff:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000156 self._verify_git_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000157 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000158 self._verify_svn_header()
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000159 self.hunks = self._split_hunks()
maruel@chromium.org5e975632011-09-29 18:07:06 +0000160 if self.source_filename and not self.is_new:
161 self._fail('If source_filename is set, is_new must be also be set')
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000162
maruel@chromium.org5e975632011-09-29 18:07:06 +0000163 def get(self, for_git):
164 if for_git or not self.source_filename:
165 return self.diff_header + self.diff_hunks
166 else:
167 # patch is stupid. It patches the source_filename instead so get rid of
168 # any source_filename reference if needed.
169 return (
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000170 self.diff_header.replace(
171 self.source_filename_utf8, self.filename_utf8) +
maruel@chromium.org5e975632011-09-29 18:07:06 +0000172 self.diff_hunks)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000173
174 def set_relpath(self, relpath):
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000175 old_filename = self.filename_utf8
176 old_source_filename = self.source_filename_utf8 or self.filename_utf8
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000177 super(FilePatchDiff, self).set_relpath(relpath)
178 # Update the header too.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000179 filename = self.filename_utf8
180 source_filename = self.source_filename_utf8 or self.filename_utf8
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000181 lines = self.diff_header.splitlines(True)
182 for i, line in enumerate(lines):
183 if line.startswith('diff --git'):
184 lines[i] = line.replace(
185 'a/' + old_source_filename, source_filename).replace(
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000186 'b/' + old_filename, filename)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000187 elif re.match(r'^\w+ from .+$', line) or line.startswith('---'):
188 lines[i] = line.replace(old_source_filename, source_filename)
189 elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'):
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000190 lines[i] = line.replace(old_filename, filename)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000191 self.diff_header = ''.join(lines)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000192
193 def _split_header(self, diff):
194 """Splits a diff in two: the header and the hunks."""
195 header = []
196 hunks = diff.splitlines(True)
197 while hunks:
198 header.append(hunks.pop(0))
199 if header[-1].startswith('--- '):
200 break
201 else:
202 # Some diff may not have a ---/+++ set like a git rename with no change or
203 # a svn diff with only property change.
204 pass
205
206 if hunks:
207 if not hunks[0].startswith('+++ '):
208 self._fail('Inconsistent header')
209 header.append(hunks.pop(0))
210 if hunks:
211 if not hunks[0].startswith('@@ '):
212 self._fail('Inconsistent hunk header')
213
214 # Mangle any \\ in the header to /.
215 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000216 basename = os.path.basename(self.filename_utf8)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000217 for i in xrange(len(header)):
218 if (header[i].split(' ', 1)[0] in header_lines or
219 header[i].endswith(basename)):
220 header[i] = header[i].replace('\\', '/')
221 return ''.join(header), ''.join(hunks)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000222
223 @staticmethod
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000224 def _is_git_diff_header(diff_header):
225 """Returns True if the diff for a single files was generated with git."""
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000226 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
227 # Rename partial change:
228 # http://codereview.chromium.org/download/issue6250123_3013_6010.diff
229 # Rename no change:
230 # http://codereview.chromium.org/download/issue6287022_3001_4010.diff
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000231 return any(l.startswith('diff --git') for l in diff_header.splitlines())
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000232
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000233 def _split_hunks(self):
234 """Splits the hunks and does verification."""
235 hunks = []
236 for line in self.diff_hunks.splitlines(True):
237 if line.startswith('@@'):
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000238 match = re.match(r'^@@ -([\d,]+) \+([\d,]+) @@.*$', line)
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000239 # File add will result in "-0,0 +1" but file deletion will result in
240 # "-1,N +0,0" where N is the number of lines deleted. That's from diff
241 # and svn diff. git diff doesn't exhibit this behavior.
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000242 # svn diff for a single line file rewrite "@@ -1 +1 @@". Fun.
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000243 # "@@ -1 +1,N @@" is also valid where N is the length of the new file.
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000244 if not match:
245 self._fail('Hunk header is unparsable')
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000246 count = match.group(1).count(',')
247 if not count:
248 start_src = int(match.group(1))
249 lines_src = 1
250 elif count == 1:
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000251 start_src, lines_src = map(int, match.group(1).split(',', 1))
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000252 else:
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000253 self._fail('Hunk header is malformed')
254
255 count = match.group(2).count(',')
256 if not count:
257 start_dst = int(match.group(2))
258 lines_dst = 1
259 elif count == 1:
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000260 start_dst, lines_dst = map(int, match.group(2).split(',', 1))
261 else:
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000262 self._fail('Hunk header is malformed')
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000263 new_hunk = Hunk(start_src, lines_src, start_dst, lines_dst)
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000264 if hunks:
265 if new_hunk.start_src <= hunks[-1].start_src:
266 self._fail('Hunks source lines are not ordered')
267 if new_hunk.start_dst <= hunks[-1].start_dst:
268 self._fail('Hunks destination lines are not ordered')
269 hunks.append(new_hunk)
270 continue
271 hunks[-1].text.append(line)
272
273 if len(hunks) == 1:
274 if hunks[0].start_src == 0 and hunks[0].lines_src == 0:
275 self.is_new = True
276 if hunks[0].start_dst == 0 and hunks[0].lines_dst == 0:
277 self.is_delete = True
278
279 if self.is_new and self.is_delete:
280 self._fail('Hunk header is all 0')
281
282 if not self.is_new and not self.is_delete:
283 for hunk in hunks:
284 variation = (
285 len([1 for i in hunk.text if i.startswith('+')]) -
286 len([1 for i in hunk.text if i.startswith('-')]))
287 if variation != hunk.variation:
288 self._fail(
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000289 'Hunk header is incorrect: %d vs %d; %r' % (
290 variation, hunk.variation, hunk))
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000291 if not hunk.start_src:
292 self._fail(
293 'Hunk header start line is incorrect: %d' % hunk.start_src)
294 if not hunk.start_dst:
295 self._fail(
296 'Hunk header start line is incorrect: %d' % hunk.start_dst)
297 hunk.start_src -= 1
298 hunk.start_dst -= 1
299 if self.is_new and hunks:
300 hunks[0].start_dst -= 1
301 if self.is_delete and hunks:
302 hunks[0].start_src -= 1
303 return hunks
304
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000305 def mangle(self, string):
306 """Mangle a file path."""
307 return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:])
308
309 def _verify_git_header(self):
310 """Sanity checks the header.
311
312 Expects the following format:
313
314 <garbagge>
315 diff --git (|a/)<filename> (|b/)<filename>
316 <similarity>
317 <filemode changes>
318 <index>
319 <copy|rename from>
320 <copy|rename to>
321 --- <filename>
322 +++ <filename>
323
324 Everything is optional except the diff --git line.
325 """
326 lines = self.diff_header.splitlines()
327
328 # Verify the diff --git line.
329 old = None
330 new = None
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000331 while lines:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000332 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
333 if not match:
334 continue
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000335 if match.group(1).startswith('a/') and match.group(2).startswith('b/'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000336 self.patchlevel = 1
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000337 old = self.mangle(match.group(1))
338 new = self.mangle(match.group(2))
339
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000340 # The rename is about the new file so the old file can be anything.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000341 if new not in (self.filename_utf8, 'dev/null'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000342 self._fail('Unexpected git diff output name %s.' % new)
343 if old == 'dev/null' and new == 'dev/null':
344 self._fail('Unexpected /dev/null git diff.')
345 break
346
347 if not old or not new:
348 self._fail('Unexpected git diff; couldn\'t find git header.')
349
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000350 if old not in (self.filename_utf8, 'dev/null'):
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000351 # Copy or rename.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000352 self.source_filename = old.decode('utf-8')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000353 self.is_new = True
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000354
maruel@chromium.org97366be2011-06-03 20:02:46 +0000355 last_line = ''
356
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000357 while lines:
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000358 line = lines.pop(0)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000359 self._verify_git_header_process_line(lines, line, last_line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000360 last_line = line
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000361
maruel@chromium.org97366be2011-06-03 20:02:46 +0000362 # Cheap check to make sure the file name is at least mentioned in the
363 # 'diff' header. That the only remaining invariant.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000364 if not self.filename_utf8 in self.diff_header:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000365 self._fail('Diff seems corrupted.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000366
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000367 def _verify_git_header_process_line(self, lines, line, last_line):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000368 """Processes a single line of the header.
369
370 Returns True if it should continue looping.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000371
372 Format is described to
373 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
maruel@chromium.org97366be2011-06-03 20:02:46 +0000374 """
maruel@chromium.org97366be2011-06-03 20:02:46 +0000375 match = re.match(r'^(rename|copy) from (.+)$', line)
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000376 old = self.source_filename_utf8 or self.filename_utf8
maruel@chromium.org97366be2011-06-03 20:02:46 +0000377 if match:
378 if old != match.group(2):
379 self._fail('Unexpected git diff input name for line %s.' % line)
380 if not lines or not lines[0].startswith('%s to ' % match.group(1)):
381 self._fail(
382 'Confused %s from/to git diff for line %s.' %
383 (match.group(1), line))
384 return
385
maruel@chromium.org97366be2011-06-03 20:02:46 +0000386 match = re.match(r'^(rename|copy) to (.+)$', line)
387 if match:
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000388 if self.filename_utf8 != match.group(2):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000389 self._fail('Unexpected git diff output name for line %s.' % line)
390 if not last_line.startswith('%s from ' % match.group(1)):
391 self._fail(
392 'Confused %s from/to git diff for line %s.' %
393 (match.group(1), line))
394 return
395
maruel@chromium.org40052252011-11-11 20:54:55 +0000396 match = re.match(r'^deleted file mode (\d{6})$', line)
397 if match:
398 # It is necessary to parse it because there may be no hunk, like when the
399 # file was empty.
400 self.is_delete = True
401 return
402
maruel@chromium.org378a4192011-06-06 13:36:02 +0000403 match = re.match(r'^new(| file) mode (\d{6})$', line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000404 if match:
maruel@chromium.org378a4192011-06-06 13:36:02 +0000405 mode = match.group(2)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000406 # Only look at owner ACL for executable.
maruel@chromium.org86eb9e72011-06-03 20:14:52 +0000407 if bool(int(mode[4]) & 1):
maruel@chromium.orge1a03762012-09-24 15:28:52 +0000408 self.svn_properties.append(('svn:executable', '.'))
maruel@chromium.orgdffc73c2012-09-21 19:09:16 +0000409 elif not self.source_filename and self.is_new:
410 # It's a new file, not from a rename/copy, then there's no property to
411 # delete.
maruel@chromium.orgd7ca6162012-08-29 17:22:22 +0000412 self.svn_properties.append(('svn:executable', None))
maruel@chromium.org40052252011-11-11 20:54:55 +0000413 return
maruel@chromium.org97366be2011-06-03 20:02:46 +0000414
maruel@chromium.org97366be2011-06-03 20:02:46 +0000415 match = re.match(r'^--- (.*)$', line)
416 if match:
417 if last_line[:3] in ('---', '+++'):
418 self._fail('--- and +++ are reversed')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000419 if match.group(1) == '/dev/null':
420 self.is_new = True
421 elif self.mangle(match.group(1)) != old:
422 # git patches are always well formatted, do not allow random filenames.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000423 self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000424 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000425 self._fail('Missing git diff output name.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000426 return
427
maruel@chromium.org97366be2011-06-03 20:02:46 +0000428 match = re.match(r'^\+\+\+ (.*)$', line)
429 if match:
430 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000431 self._fail('Unexpected git diff: --- not following +++.')
maruel@chromium.orgbe605652011-09-02 20:28:07 +0000432 if '/dev/null' == match.group(1):
433 self.is_delete = True
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000434 elif self.filename_utf8 != self.mangle(match.group(1)):
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000435 self._fail(
436 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000437 if lines:
438 self._fail('Crap after +++')
439 # We're done.
440 return
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000441
442 def _verify_svn_header(self):
443 """Sanity checks the header.
444
445 A svn diff can contain only property changes, in that case there will be no
446 proper header. To make things worse, this property change header is
447 localized.
448 """
449 lines = self.diff_header.splitlines()
maruel@chromium.org97366be2011-06-03 20:02:46 +0000450 last_line = ''
451
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000452 while lines:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000453 line = lines.pop(0)
454 self._verify_svn_header_process_line(lines, line, last_line)
455 last_line = line
456
457 # Cheap check to make sure the file name is at least mentioned in the
458 # 'diff' header. That the only remaining invariant.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000459 if not self.filename_utf8 in self.diff_header:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000460 self._fail('Diff seems corrupted.')
461
462 def _verify_svn_header_process_line(self, lines, line, last_line):
463 """Processes a single line of the header.
464
465 Returns True if it should continue looping.
466 """
467 match = re.match(r'^--- ([^\t]+).*$', line)
468 if match:
469 if last_line[:3] in ('---', '+++'):
470 self._fail('--- and +++ are reversed')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000471 if match.group(1) == '/dev/null':
472 self.is_new = True
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000473 elif self.mangle(match.group(1)) != self.filename_utf8:
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000474 # guess the source filename.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000475 self.source_filename = match.group(1).decode('utf-8')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000476 self.is_new = True
maruel@chromium.org97366be2011-06-03 20:02:46 +0000477 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000478 self._fail('Nothing after header.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000479 return
480
481 match = re.match(r'^\+\+\+ ([^\t]+).*$', line)
482 if match:
483 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000484 self._fail('Unexpected diff: --- not following +++.')
maruel@chromium.orgbe605652011-09-02 20:28:07 +0000485 if match.group(1) == '/dev/null':
486 self.is_delete = True
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000487 elif self.mangle(match.group(1)) != self.filename_utf8:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000488 self._fail('Unexpected diff: %s.' % match.group(1))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000489 if lines:
490 self._fail('Crap after +++')
491 # We're done.
492 return
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000493
494
495class PatchSet(object):
496 """A list of FilePatch* objects."""
497
498 def __init__(self, patches):
maruel@chromium.org5e975632011-09-29 18:07:06 +0000499 for p in patches:
maruel@chromium.org8a1396c2011-04-22 00:14:24 +0000500 assert isinstance(p, FilePatchBase)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000501
maruel@chromium.org5e975632011-09-29 18:07:06 +0000502 def key(p):
503 """Sort by ordering of application.
504
505 File move are first.
506 Deletes are last.
507 """
maruel@chromium.orgde800ff2012-09-12 19:25:24 +0000508 # The bool is necessary because None < 'string' but the reverse is needed.
509 return (
510 p.is_delete,
511 # False is before True, so files *with* a source file will be first.
512 not bool(p.source_filename),
513 p.source_filename_utf8,
514 p.filename_utf8)
maruel@chromium.org5e975632011-09-29 18:07:06 +0000515
516 self.patches = sorted(patches, key=key)
517
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000518 def set_relpath(self, relpath):
519 """Used to offset the patch into a subdirectory."""
520 for patch in self.patches:
521 patch.set_relpath(relpath)
522
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000523 def __iter__(self):
524 for patch in self.patches:
525 yield patch
526
maruel@chromium.org5e975632011-09-29 18:07:06 +0000527 def __getitem__(self, key):
528 return self.patches[key]
529
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000530 @property
531 def filenames(self):
532 return [p.filename for p in self.patches]