blob: 90886e9a0fc23ae142e1f524ecee7f18f83d39fe [file] [log] [blame]
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00001# coding=utf8
maruel@chromium.orgcf602552012-01-10 19:49:31 +00002# Copyright (c) 2012 The Chromium Authors. All rights reserved.
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00003# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Utility functions to handle patches."""
6
maruel@chromium.orgcd619402011-04-09 00:08:00 +00007import posixpath
8import os
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00009import re
10
11
12class UnsupportedPatchFormat(Exception):
13 def __init__(self, filename, status):
14 super(UnsupportedPatchFormat, self).__init__(filename, status)
15 self.filename = filename
16 self.status = status
17
18 def __str__(self):
19 out = 'Can\'t process patch for file %s.' % self.filename
20 if self.status:
21 out += '\n%s' % self.status
22 return out
23
24
25class FilePatchBase(object):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000026 """Defines a single file being modified.
27
28 '/' is always used instead of os.sep for consistency.
29 """
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000030 is_delete = False
31 is_binary = False
maruel@chromium.org97366be2011-06-03 20:02:46 +000032 is_new = False
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000033
maruel@chromium.orgcd619402011-04-09 00:08:00 +000034 def __init__(self, filename):
maruel@chromium.org5e975632011-09-29 18:07:06 +000035 assert self.__class__ is not FilePatchBase
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000036 self.filename = self._process_filename(filename)
maruel@chromium.orga19047c2011-09-08 12:49:58 +000037 # Set when the file is copied or moved.
38 self.source_filename = None
maruel@chromium.orgcd619402011-04-09 00:08:00 +000039
maruel@chromium.org8fab6b62012-02-16 21:50:35 +000040 @property
41 def filename_utf8(self):
42 return self.filename.encode('utf-8')
43
44 @property
45 def source_filename_utf8(self):
46 if self.source_filename is not None:
47 return self.source_filename.encode('utf-8')
48
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000049 @staticmethod
50 def _process_filename(filename):
51 filename = filename.replace('\\', '/')
maruel@chromium.orgcd619402011-04-09 00:08:00 +000052 # Blacklist a few characters for simplicity.
53 for i in ('%', '$', '..', '\'', '"'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000054 if i in filename:
55 raise UnsupportedPatchFormat(
56 filename, 'Can\'t use \'%s\' in filename.' % i)
maruel@chromium.orgcd619402011-04-09 00:08:00 +000057 for i in ('/', 'CON', 'COM'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000058 if filename.startswith(i):
59 raise UnsupportedPatchFormat(
60 filename, 'Filename can\'t start with \'%s\'.' % i)
61 return filename
maruel@chromium.orgcd619402011-04-09 00:08:00 +000062
maruel@chromium.orgcd619402011-04-09 00:08:00 +000063 def set_relpath(self, relpath):
64 if not relpath:
65 return
66 relpath = relpath.replace('\\', '/')
67 if relpath[0] == '/':
68 self._fail('Relative path starts with %s' % relpath[0])
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000069 self.filename = self._process_filename(
70 posixpath.join(relpath, self.filename))
maruel@chromium.orga19047c2011-09-08 12:49:58 +000071 if self.source_filename:
72 self.source_filename = self._process_filename(
73 posixpath.join(relpath, self.source_filename))
maruel@chromium.orgcd619402011-04-09 00:08:00 +000074
75 def _fail(self, msg):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000076 """Shortcut function to raise UnsupportedPatchFormat."""
maruel@chromium.orgcd619402011-04-09 00:08:00 +000077 raise UnsupportedPatchFormat(self.filename, msg)
78
maruel@chromium.org5e975632011-09-29 18:07:06 +000079 def __str__(self):
80 # Use a status-like board.
81 out = ''
82 if self.is_binary:
83 out += 'B'
84 else:
85 out += ' '
86 if self.is_delete:
87 out += 'D'
88 else:
89 out += ' '
90 if self.is_new:
91 out += 'N'
92 else:
93 out += ' '
94 if self.source_filename:
95 out += 'R'
96 else:
97 out += ' '
maruel@chromium.orgcf602552012-01-10 19:49:31 +000098 out += ' '
99 if self.source_filename:
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000100 out += '%s->' % self.source_filename_utf8
101 return out + self.filename_utf8
maruel@chromium.org5e975632011-09-29 18:07:06 +0000102
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000103
104class FilePatchDelete(FilePatchBase):
105 """Deletes a file."""
106 is_delete = True
107
108 def __init__(self, filename, is_binary):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000109 super(FilePatchDelete, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000110 self.is_binary = is_binary
111
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000112
113class FilePatchBinary(FilePatchBase):
114 """Content of a new binary file."""
115 is_binary = True
116
maruel@chromium.org97366be2011-06-03 20:02:46 +0000117 def __init__(self, filename, data, svn_properties, is_new):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000118 super(FilePatchBinary, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000119 self.data = data
120 self.svn_properties = svn_properties or []
maruel@chromium.org97366be2011-06-03 20:02:46 +0000121 self.is_new = is_new
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000122
123 def get(self):
124 return self.data
125
126
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000127class Hunk(object):
128 """Parsed hunk data container."""
129
130 def __init__(self, start_src, lines_src, start_dst, lines_dst):
131 self.start_src = start_src
132 self.lines_src = lines_src
133 self.start_dst = start_dst
134 self.lines_dst = lines_dst
135 self.variation = self.lines_dst - self.lines_src
136 self.text = []
137
138
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000139class FilePatchDiff(FilePatchBase):
140 """Patch for a single file."""
141
142 def __init__(self, filename, diff, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000143 super(FilePatchDiff, self).__init__(filename)
maruel@chromium.org61e0b692011-04-12 21:01:01 +0000144 if not diff:
145 self._fail('File doesn\'t have a diff.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000146 self.diff_header, self.diff_hunks = self._split_header(diff)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000147 self.svn_properties = svn_properties or []
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000148 self.is_git_diff = self._is_git_diff_header(self.diff_header)
149 self.patchlevel = 0
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000150 if self.is_git_diff:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000151 self._verify_git_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000152 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000153 self._verify_svn_header()
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000154 self.hunks = self._split_hunks()
maruel@chromium.org5e975632011-09-29 18:07:06 +0000155 if self.source_filename and not self.is_new:
156 self._fail('If source_filename is set, is_new must be also be set')
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000157
maruel@chromium.org5e975632011-09-29 18:07:06 +0000158 def get(self, for_git):
159 if for_git or not self.source_filename:
160 return self.diff_header + self.diff_hunks
161 else:
162 # patch is stupid. It patches the source_filename instead so get rid of
163 # any source_filename reference if needed.
164 return (
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000165 self.diff_header.replace(
166 self.source_filename_utf8, self.filename_utf8) +
maruel@chromium.org5e975632011-09-29 18:07:06 +0000167 self.diff_hunks)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000168
169 def set_relpath(self, relpath):
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000170 old_filename = self.filename_utf8
171 old_source_filename = self.source_filename_utf8 or self.filename_utf8
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000172 super(FilePatchDiff, self).set_relpath(relpath)
173 # Update the header too.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000174 filename = self.filename_utf8
175 source_filename = self.source_filename_utf8 or self.filename_utf8
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000176 lines = self.diff_header.splitlines(True)
177 for i, line in enumerate(lines):
178 if line.startswith('diff --git'):
179 lines[i] = line.replace(
180 'a/' + old_source_filename, source_filename).replace(
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000181 'b/' + old_filename, filename)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000182 elif re.match(r'^\w+ from .+$', line) or line.startswith('---'):
183 lines[i] = line.replace(old_source_filename, source_filename)
184 elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'):
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000185 lines[i] = line.replace(old_filename, filename)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000186 self.diff_header = ''.join(lines)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000187
188 def _split_header(self, diff):
189 """Splits a diff in two: the header and the hunks."""
190 header = []
191 hunks = diff.splitlines(True)
192 while hunks:
193 header.append(hunks.pop(0))
194 if header[-1].startswith('--- '):
195 break
196 else:
197 # Some diff may not have a ---/+++ set like a git rename with no change or
198 # a svn diff with only property change.
199 pass
200
201 if hunks:
202 if not hunks[0].startswith('+++ '):
203 self._fail('Inconsistent header')
204 header.append(hunks.pop(0))
205 if hunks:
206 if not hunks[0].startswith('@@ '):
207 self._fail('Inconsistent hunk header')
208
209 # Mangle any \\ in the header to /.
210 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000211 basename = os.path.basename(self.filename_utf8)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000212 for i in xrange(len(header)):
213 if (header[i].split(' ', 1)[0] in header_lines or
214 header[i].endswith(basename)):
215 header[i] = header[i].replace('\\', '/')
216 return ''.join(header), ''.join(hunks)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000217
218 @staticmethod
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000219 def _is_git_diff_header(diff_header):
220 """Returns True if the diff for a single files was generated with git."""
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000221 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
222 # Rename partial change:
223 # http://codereview.chromium.org/download/issue6250123_3013_6010.diff
224 # Rename no change:
225 # http://codereview.chromium.org/download/issue6287022_3001_4010.diff
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000226 return any(l.startswith('diff --git') for l in diff_header.splitlines())
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000227
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000228 def _split_hunks(self):
229 """Splits the hunks and does verification."""
230 hunks = []
231 for line in self.diff_hunks.splitlines(True):
232 if line.startswith('@@'):
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000233 match = re.match(r'^@@ -([\d,]+) \+([\d,]+) @@.*$', line)
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000234 # File add will result in "-0,0 +1" but file deletion will result in
235 # "-1,N +0,0" where N is the number of lines deleted. That's from diff
236 # and svn diff. git diff doesn't exhibit this behavior.
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000237 # svn diff for a single line file rewrite "@@ -1 +1 @@". Fun.
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000238 if not match:
239 self._fail('Hunk header is unparsable')
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000240 if ',' in match.group(1):
241 start_src, lines_src = map(int, match.group(1).split(',', 1))
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000242 else:
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000243 start_src = int(match.group(1))
244 lines_src = 0
245 if ',' in match.group(2):
246 start_dst, lines_dst = map(int, match.group(2).split(',', 1))
247 else:
248 start_dst = int(match.group(2))
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000249 lines_dst = 0
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000250 new_hunk = Hunk(start_src, lines_src, start_dst, lines_dst)
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000251 if hunks:
252 if new_hunk.start_src <= hunks[-1].start_src:
253 self._fail('Hunks source lines are not ordered')
254 if new_hunk.start_dst <= hunks[-1].start_dst:
255 self._fail('Hunks destination lines are not ordered')
256 hunks.append(new_hunk)
257 continue
258 hunks[-1].text.append(line)
259
260 if len(hunks) == 1:
261 if hunks[0].start_src == 0 and hunks[0].lines_src == 0:
262 self.is_new = True
263 if hunks[0].start_dst == 0 and hunks[0].lines_dst == 0:
264 self.is_delete = True
265
266 if self.is_new and self.is_delete:
267 self._fail('Hunk header is all 0')
268
269 if not self.is_new and not self.is_delete:
270 for hunk in hunks:
271 variation = (
272 len([1 for i in hunk.text if i.startswith('+')]) -
273 len([1 for i in hunk.text if i.startswith('-')]))
274 if variation != hunk.variation:
275 self._fail(
276 'Hunk header is incorrect: %d vs %d' % (
277 variation, hunk.variation))
278 if not hunk.start_src:
279 self._fail(
280 'Hunk header start line is incorrect: %d' % hunk.start_src)
281 if not hunk.start_dst:
282 self._fail(
283 'Hunk header start line is incorrect: %d' % hunk.start_dst)
284 hunk.start_src -= 1
285 hunk.start_dst -= 1
286 if self.is_new and hunks:
287 hunks[0].start_dst -= 1
288 if self.is_delete and hunks:
289 hunks[0].start_src -= 1
290 return hunks
291
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000292 def mangle(self, string):
293 """Mangle a file path."""
294 return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:])
295
296 def _verify_git_header(self):
297 """Sanity checks the header.
298
299 Expects the following format:
300
301 <garbagge>
302 diff --git (|a/)<filename> (|b/)<filename>
303 <similarity>
304 <filemode changes>
305 <index>
306 <copy|rename from>
307 <copy|rename to>
308 --- <filename>
309 +++ <filename>
310
311 Everything is optional except the diff --git line.
312 """
313 lines = self.diff_header.splitlines()
314
315 # Verify the diff --git line.
316 old = None
317 new = None
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000318 while lines:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000319 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
320 if not match:
321 continue
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000322 if match.group(1).startswith('a/') and match.group(2).startswith('b/'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000323 self.patchlevel = 1
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000324 old = self.mangle(match.group(1))
325 new = self.mangle(match.group(2))
326
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000327 # The rename is about the new file so the old file can be anything.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000328 if new not in (self.filename_utf8, 'dev/null'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000329 self._fail('Unexpected git diff output name %s.' % new)
330 if old == 'dev/null' and new == 'dev/null':
331 self._fail('Unexpected /dev/null git diff.')
332 break
333
334 if not old or not new:
335 self._fail('Unexpected git diff; couldn\'t find git header.')
336
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000337 if old not in (self.filename_utf8, 'dev/null'):
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000338 # Copy or rename.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000339 self.source_filename = old.decode('utf-8')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000340 self.is_new = True
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000341
maruel@chromium.org97366be2011-06-03 20:02:46 +0000342 last_line = ''
343
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000344 while lines:
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000345 line = lines.pop(0)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000346 self._verify_git_header_process_line(lines, line, last_line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000347 last_line = line
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000348
maruel@chromium.org97366be2011-06-03 20:02:46 +0000349 # Cheap check to make sure the file name is at least mentioned in the
350 # 'diff' header. That the only remaining invariant.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000351 if not self.filename_utf8 in self.diff_header:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000352 self._fail('Diff seems corrupted.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000353
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000354 def _verify_git_header_process_line(self, lines, line, last_line):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000355 """Processes a single line of the header.
356
357 Returns True if it should continue looping.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000358
359 Format is described to
360 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
maruel@chromium.org97366be2011-06-03 20:02:46 +0000361 """
maruel@chromium.org97366be2011-06-03 20:02:46 +0000362 match = re.match(r'^(rename|copy) from (.+)$', line)
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000363 old = self.source_filename_utf8 or self.filename_utf8
maruel@chromium.org97366be2011-06-03 20:02:46 +0000364 if match:
365 if old != match.group(2):
366 self._fail('Unexpected git diff input name for line %s.' % line)
367 if not lines or not lines[0].startswith('%s to ' % match.group(1)):
368 self._fail(
369 'Confused %s from/to git diff for line %s.' %
370 (match.group(1), line))
371 return
372
maruel@chromium.org97366be2011-06-03 20:02:46 +0000373 match = re.match(r'^(rename|copy) to (.+)$', line)
374 if match:
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000375 if self.filename_utf8 != match.group(2):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000376 self._fail('Unexpected git diff output name for line %s.' % line)
377 if not last_line.startswith('%s from ' % match.group(1)):
378 self._fail(
379 'Confused %s from/to git diff for line %s.' %
380 (match.group(1), line))
381 return
382
maruel@chromium.org40052252011-11-11 20:54:55 +0000383 match = re.match(r'^deleted file mode (\d{6})$', line)
384 if match:
385 # It is necessary to parse it because there may be no hunk, like when the
386 # file was empty.
387 self.is_delete = True
388 return
389
maruel@chromium.org378a4192011-06-06 13:36:02 +0000390 match = re.match(r'^new(| file) mode (\d{6})$', line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000391 if match:
maruel@chromium.org378a4192011-06-06 13:36:02 +0000392 mode = match.group(2)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000393 # Only look at owner ACL for executable.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000394 # TODO(maruel): Add support to remove a property.
maruel@chromium.org86eb9e72011-06-03 20:14:52 +0000395 if bool(int(mode[4]) & 1):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000396 self.svn_properties.append(('svn:executable', '*'))
maruel@chromium.org40052252011-11-11 20:54:55 +0000397 return
maruel@chromium.org97366be2011-06-03 20:02:46 +0000398
maruel@chromium.org97366be2011-06-03 20:02:46 +0000399 match = re.match(r'^--- (.*)$', line)
400 if match:
401 if last_line[:3] in ('---', '+++'):
402 self._fail('--- and +++ are reversed')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000403 if match.group(1) == '/dev/null':
404 self.is_new = True
405 elif self.mangle(match.group(1)) != old:
406 # git patches are always well formatted, do not allow random filenames.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000407 self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000408 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000409 self._fail('Missing git diff output name.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000410 return
411
maruel@chromium.org97366be2011-06-03 20:02:46 +0000412 match = re.match(r'^\+\+\+ (.*)$', line)
413 if match:
414 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000415 self._fail('Unexpected git diff: --- not following +++.')
maruel@chromium.orgbe605652011-09-02 20:28:07 +0000416 if '/dev/null' == match.group(1):
417 self.is_delete = True
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000418 elif self.filename_utf8 != self.mangle(match.group(1)):
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000419 self._fail(
420 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000421 if lines:
422 self._fail('Crap after +++')
423 # We're done.
424 return
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000425
426 def _verify_svn_header(self):
427 """Sanity checks the header.
428
429 A svn diff can contain only property changes, in that case there will be no
430 proper header. To make things worse, this property change header is
431 localized.
432 """
433 lines = self.diff_header.splitlines()
maruel@chromium.org97366be2011-06-03 20:02:46 +0000434 last_line = ''
435
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000436 while lines:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000437 line = lines.pop(0)
438 self._verify_svn_header_process_line(lines, line, last_line)
439 last_line = line
440
441 # Cheap check to make sure the file name is at least mentioned in the
442 # 'diff' header. That the only remaining invariant.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000443 if not self.filename_utf8 in self.diff_header:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000444 self._fail('Diff seems corrupted.')
445
446 def _verify_svn_header_process_line(self, lines, line, last_line):
447 """Processes a single line of the header.
448
449 Returns True if it should continue looping.
450 """
451 match = re.match(r'^--- ([^\t]+).*$', line)
452 if match:
453 if last_line[:3] in ('---', '+++'):
454 self._fail('--- and +++ are reversed')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000455 if match.group(1) == '/dev/null':
456 self.is_new = True
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000457 elif self.mangle(match.group(1)) != self.filename_utf8:
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000458 # guess the source filename.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000459 self.source_filename = match.group(1).decode('utf-8')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000460 self.is_new = True
maruel@chromium.org97366be2011-06-03 20:02:46 +0000461 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000462 self._fail('Nothing after header.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000463 return
464
465 match = re.match(r'^\+\+\+ ([^\t]+).*$', line)
466 if match:
467 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000468 self._fail('Unexpected diff: --- not following +++.')
maruel@chromium.orgbe605652011-09-02 20:28:07 +0000469 if match.group(1) == '/dev/null':
470 self.is_delete = True
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000471 elif self.mangle(match.group(1)) != self.filename_utf8:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000472 self._fail('Unexpected diff: %s.' % match.group(1))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000473 if lines:
474 self._fail('Crap after +++')
475 # We're done.
476 return
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000477
478
479class PatchSet(object):
480 """A list of FilePatch* objects."""
481
482 def __init__(self, patches):
maruel@chromium.org5e975632011-09-29 18:07:06 +0000483 for p in patches:
maruel@chromium.org8a1396c2011-04-22 00:14:24 +0000484 assert isinstance(p, FilePatchBase)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000485
maruel@chromium.org5e975632011-09-29 18:07:06 +0000486 def key(p):
487 """Sort by ordering of application.
488
489 File move are first.
490 Deletes are last.
491 """
492 if p.source_filename:
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000493 return (p.is_delete, p.source_filename_utf8, p.filename_utf8)
maruel@chromium.org5e975632011-09-29 18:07:06 +0000494 else:
495 # tuple are always greater than string, abuse that fact.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000496 return (p.is_delete, (p.filename_utf8,), p.filename_utf8)
maruel@chromium.org5e975632011-09-29 18:07:06 +0000497
498 self.patches = sorted(patches, key=key)
499
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000500 def set_relpath(self, relpath):
501 """Used to offset the patch into a subdirectory."""
502 for patch in self.patches:
503 patch.set_relpath(relpath)
504
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000505 def __iter__(self):
506 for patch in self.patches:
507 yield patch
508
maruel@chromium.org5e975632011-09-29 18:07:06 +0000509 def __getitem__(self, key):
510 return self.patches[key]
511
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000512 @property
513 def filenames(self):
514 return [p.filename for p in self.patches]