blob: 7dcefc011ae81b155604c2e78432e75231cbb4ae [file] [log] [blame]
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00001# coding=utf8
maruel@chromium.orgcf602552012-01-10 19:49:31 +00002# Copyright (c) 2012 The Chromium Authors. All rights reserved.
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00003# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Utility functions to handle patches."""
6
maruel@chromium.orgcd619402011-04-09 00:08:00 +00007import posixpath
8import os
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00009import re
10
11
12class UnsupportedPatchFormat(Exception):
13 def __init__(self, filename, status):
14 super(UnsupportedPatchFormat, self).__init__(filename, status)
15 self.filename = filename
16 self.status = status
17
18 def __str__(self):
19 out = 'Can\'t process patch for file %s.' % self.filename
20 if self.status:
21 out += '\n%s' % self.status
22 return out
23
24
25class FilePatchBase(object):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000026 """Defines a single file being modified.
27
28 '/' is always used instead of os.sep for consistency.
29 """
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000030 is_delete = False
31 is_binary = False
maruel@chromium.org97366be2011-06-03 20:02:46 +000032 is_new = False
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000033
maruel@chromium.orgcd619402011-04-09 00:08:00 +000034 def __init__(self, filename):
maruel@chromium.org5e975632011-09-29 18:07:06 +000035 assert self.__class__ is not FilePatchBase
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000036 self.filename = self._process_filename(filename)
maruel@chromium.orga19047c2011-09-08 12:49:58 +000037 # Set when the file is copied or moved.
38 self.source_filename = None
maruel@chromium.orgcd619402011-04-09 00:08:00 +000039
maruel@chromium.org8fab6b62012-02-16 21:50:35 +000040 @property
41 def filename_utf8(self):
42 return self.filename.encode('utf-8')
43
44 @property
45 def source_filename_utf8(self):
46 if self.source_filename is not None:
47 return self.source_filename.encode('utf-8')
48
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000049 @staticmethod
50 def _process_filename(filename):
51 filename = filename.replace('\\', '/')
maruel@chromium.orgcd619402011-04-09 00:08:00 +000052 # Blacklist a few characters for simplicity.
phajdan.jr@chromium.orgca858012015-03-27 15:21:53 +000053 for i in ('$', '..', '\'', '"', '<', '>', ':', '|', '?', '*'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000054 if i in filename:
55 raise UnsupportedPatchFormat(
56 filename, 'Can\'t use \'%s\' in filename.' % i)
maruel@chromium.orgcd619402011-04-09 00:08:00 +000057 for i in ('/', 'CON', 'COM'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000058 if filename.startswith(i):
59 raise UnsupportedPatchFormat(
60 filename, 'Filename can\'t start with \'%s\'.' % i)
61 return filename
maruel@chromium.orgcd619402011-04-09 00:08:00 +000062
maruel@chromium.orgcd619402011-04-09 00:08:00 +000063 def set_relpath(self, relpath):
64 if not relpath:
65 return
66 relpath = relpath.replace('\\', '/')
67 if relpath[0] == '/':
68 self._fail('Relative path starts with %s' % relpath[0])
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000069 self.filename = self._process_filename(
70 posixpath.join(relpath, self.filename))
maruel@chromium.orga19047c2011-09-08 12:49:58 +000071 if self.source_filename:
72 self.source_filename = self._process_filename(
73 posixpath.join(relpath, self.source_filename))
maruel@chromium.orgcd619402011-04-09 00:08:00 +000074
75 def _fail(self, msg):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000076 """Shortcut function to raise UnsupportedPatchFormat."""
maruel@chromium.orgcd619402011-04-09 00:08:00 +000077 raise UnsupportedPatchFormat(self.filename, msg)
78
maruel@chromium.org5e975632011-09-29 18:07:06 +000079 def __str__(self):
80 # Use a status-like board.
81 out = ''
82 if self.is_binary:
83 out += 'B'
84 else:
85 out += ' '
86 if self.is_delete:
87 out += 'D'
88 else:
89 out += ' '
90 if self.is_new:
91 out += 'N'
92 else:
93 out += ' '
94 if self.source_filename:
95 out += 'R'
96 else:
97 out += ' '
maruel@chromium.orgcf602552012-01-10 19:49:31 +000098 out += ' '
99 if self.source_filename:
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000100 out += '%s->' % self.source_filename_utf8
101 return out + self.filename_utf8
maruel@chromium.org5e975632011-09-29 18:07:06 +0000102
maruel@chromium.org4dd9f722012-10-01 16:23:03 +0000103 def dump(self):
104 """Dumps itself in a verbose way to help diagnosing."""
105 return str(self)
106
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000107
108class FilePatchDelete(FilePatchBase):
109 """Deletes a file."""
110 is_delete = True
111
112 def __init__(self, filename, is_binary):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000113 super(FilePatchDelete, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000114 self.is_binary = is_binary
115
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000116
117class FilePatchBinary(FilePatchBase):
118 """Content of a new binary file."""
119 is_binary = True
120
maruel@chromium.org97366be2011-06-03 20:02:46 +0000121 def __init__(self, filename, data, svn_properties, is_new):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000122 super(FilePatchBinary, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000123 self.data = data
124 self.svn_properties = svn_properties or []
maruel@chromium.org97366be2011-06-03 20:02:46 +0000125 self.is_new = is_new
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000126
127 def get(self):
128 return self.data
129
maruel@chromium.org4dd9f722012-10-01 16:23:03 +0000130 def __str__(self):
131 return str(super(FilePatchBinary, self)) + ' %d bytes' % len(self.data)
132
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000133
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000134class Hunk(object):
135 """Parsed hunk data container."""
136
137 def __init__(self, start_src, lines_src, start_dst, lines_dst):
138 self.start_src = start_src
139 self.lines_src = lines_src
140 self.start_dst = start_dst
141 self.lines_dst = lines_dst
142 self.variation = self.lines_dst - self.lines_src
143 self.text = []
144
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000145 def __repr__(self):
146 return '%s<(%d, %d) to (%d, %d)>' % (
147 self.__class__.__name__,
148 self.start_src, self.lines_src, self.start_dst, self.lines_dst)
149
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000150
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000151class FilePatchDiff(FilePatchBase):
152 """Patch for a single file."""
153
154 def __init__(self, filename, diff, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000155 super(FilePatchDiff, self).__init__(filename)
maruel@chromium.org61e0b692011-04-12 21:01:01 +0000156 if not diff:
157 self._fail('File doesn\'t have a diff.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000158 self.diff_header, self.diff_hunks = self._split_header(diff)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000159 self.svn_properties = svn_properties or []
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000160 self.is_git_diff = self._is_git_diff_header(self.diff_header)
161 self.patchlevel = 0
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000162 if self.is_git_diff:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000163 self._verify_git_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000164 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000165 self._verify_svn_header()
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000166 self.hunks = self._split_hunks()
maruel@chromium.org5e975632011-09-29 18:07:06 +0000167 if self.source_filename and not self.is_new:
168 self._fail('If source_filename is set, is_new must be also be set')
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000169
maruel@chromium.org5e975632011-09-29 18:07:06 +0000170 def get(self, for_git):
171 if for_git or not self.source_filename:
172 return self.diff_header + self.diff_hunks
173 else:
174 # patch is stupid. It patches the source_filename instead so get rid of
175 # any source_filename reference if needed.
176 return (
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000177 self.diff_header.replace(
178 self.source_filename_utf8, self.filename_utf8) +
maruel@chromium.org5e975632011-09-29 18:07:06 +0000179 self.diff_hunks)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000180
181 def set_relpath(self, relpath):
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000182 old_filename = self.filename_utf8
183 old_source_filename = self.source_filename_utf8 or self.filename_utf8
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000184 super(FilePatchDiff, self).set_relpath(relpath)
185 # Update the header too.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000186 filename = self.filename_utf8
187 source_filename = self.source_filename_utf8 or self.filename_utf8
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000188 lines = self.diff_header.splitlines(True)
189 for i, line in enumerate(lines):
190 if line.startswith('diff --git'):
191 lines[i] = line.replace(
192 'a/' + old_source_filename, source_filename).replace(
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000193 'b/' + old_filename, filename)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000194 elif re.match(r'^\w+ from .+$', line) or line.startswith('---'):
195 lines[i] = line.replace(old_source_filename, source_filename)
196 elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'):
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000197 lines[i] = line.replace(old_filename, filename)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000198 self.diff_header = ''.join(lines)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000199
200 def _split_header(self, diff):
201 """Splits a diff in two: the header and the hunks."""
202 header = []
203 hunks = diff.splitlines(True)
204 while hunks:
205 header.append(hunks.pop(0))
206 if header[-1].startswith('--- '):
207 break
208 else:
209 # Some diff may not have a ---/+++ set like a git rename with no change or
210 # a svn diff with only property change.
211 pass
212
213 if hunks:
214 if not hunks[0].startswith('+++ '):
215 self._fail('Inconsistent header')
216 header.append(hunks.pop(0))
217 if hunks:
218 if not hunks[0].startswith('@@ '):
219 self._fail('Inconsistent hunk header')
220
221 # Mangle any \\ in the header to /.
222 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000223 basename = os.path.basename(self.filename_utf8)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000224 for i in xrange(len(header)):
225 if (header[i].split(' ', 1)[0] in header_lines or
226 header[i].endswith(basename)):
227 header[i] = header[i].replace('\\', '/')
228 return ''.join(header), ''.join(hunks)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000229
230 @staticmethod
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000231 def _is_git_diff_header(diff_header):
232 """Returns True if the diff for a single files was generated with git."""
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000233 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
234 # Rename partial change:
235 # http://codereview.chromium.org/download/issue6250123_3013_6010.diff
236 # Rename no change:
237 # http://codereview.chromium.org/download/issue6287022_3001_4010.diff
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000238 return any(l.startswith('diff --git') for l in diff_header.splitlines())
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000239
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000240 def _split_hunks(self):
241 """Splits the hunks and does verification."""
242 hunks = []
243 for line in self.diff_hunks.splitlines(True):
244 if line.startswith('@@'):
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000245 match = re.match(r'^@@ -([\d,]+) \+([\d,]+) @@.*$', line)
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000246 # File add will result in "-0,0 +1" but file deletion will result in
247 # "-1,N +0,0" where N is the number of lines deleted. That's from diff
248 # and svn diff. git diff doesn't exhibit this behavior.
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000249 # svn diff for a single line file rewrite "@@ -1 +1 @@". Fun.
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000250 # "@@ -1 +1,N @@" is also valid where N is the length of the new file.
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000251 if not match:
252 self._fail('Hunk header is unparsable')
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000253 count = match.group(1).count(',')
254 if not count:
255 start_src = int(match.group(1))
256 lines_src = 1
257 elif count == 1:
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000258 start_src, lines_src = map(int, match.group(1).split(',', 1))
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000259 else:
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000260 self._fail('Hunk header is malformed')
261
262 count = match.group(2).count(',')
263 if not count:
264 start_dst = int(match.group(2))
265 lines_dst = 1
266 elif count == 1:
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000267 start_dst, lines_dst = map(int, match.group(2).split(',', 1))
268 else:
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000269 self._fail('Hunk header is malformed')
maruel@chromium.orgdb1fd782012-01-11 01:51:29 +0000270 new_hunk = Hunk(start_src, lines_src, start_dst, lines_dst)
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000271 if hunks:
272 if new_hunk.start_src <= hunks[-1].start_src:
273 self._fail('Hunks source lines are not ordered')
274 if new_hunk.start_dst <= hunks[-1].start_dst:
275 self._fail('Hunks destination lines are not ordered')
276 hunks.append(new_hunk)
277 continue
278 hunks[-1].text.append(line)
279
280 if len(hunks) == 1:
281 if hunks[0].start_src == 0 and hunks[0].lines_src == 0:
282 self.is_new = True
283 if hunks[0].start_dst == 0 and hunks[0].lines_dst == 0:
284 self.is_delete = True
285
286 if self.is_new and self.is_delete:
287 self._fail('Hunk header is all 0')
288
289 if not self.is_new and not self.is_delete:
290 for hunk in hunks:
291 variation = (
292 len([1 for i in hunk.text if i.startswith('+')]) -
293 len([1 for i in hunk.text if i.startswith('-')]))
294 if variation != hunk.variation:
295 self._fail(
maruel@chromium.org17fa4be2012-08-29 17:18:12 +0000296 'Hunk header is incorrect: %d vs %d; %r' % (
297 variation, hunk.variation, hunk))
maruel@chromium.orgcf602552012-01-10 19:49:31 +0000298 if not hunk.start_src:
299 self._fail(
300 'Hunk header start line is incorrect: %d' % hunk.start_src)
301 if not hunk.start_dst:
302 self._fail(
303 'Hunk header start line is incorrect: %d' % hunk.start_dst)
304 hunk.start_src -= 1
305 hunk.start_dst -= 1
306 if self.is_new and hunks:
307 hunks[0].start_dst -= 1
308 if self.is_delete and hunks:
309 hunks[0].start_src -= 1
310 return hunks
311
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000312 def mangle(self, string):
313 """Mangle a file path."""
314 return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:])
315
316 def _verify_git_header(self):
317 """Sanity checks the header.
318
319 Expects the following format:
320
nick@chromium.orgff526192013-06-10 19:30:26 +0000321 <garbage>
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000322 diff --git (|a/)<filename> (|b/)<filename>
323 <similarity>
324 <filemode changes>
325 <index>
326 <copy|rename from>
327 <copy|rename to>
328 --- <filename>
329 +++ <filename>
330
331 Everything is optional except the diff --git line.
332 """
333 lines = self.diff_header.splitlines()
334
335 # Verify the diff --git line.
336 old = None
337 new = None
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000338 while lines:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000339 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
340 if not match:
341 continue
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000342 if match.group(1).startswith('a/') and match.group(2).startswith('b/'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000343 self.patchlevel = 1
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000344 old = self.mangle(match.group(1))
345 new = self.mangle(match.group(2))
346
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000347 # The rename is about the new file so the old file can be anything.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000348 if new not in (self.filename_utf8, 'dev/null'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000349 self._fail('Unexpected git diff output name %s.' % new)
350 if old == 'dev/null' and new == 'dev/null':
351 self._fail('Unexpected /dev/null git diff.')
352 break
353
354 if not old or not new:
355 self._fail('Unexpected git diff; couldn\'t find git header.')
356
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000357 if old not in (self.filename_utf8, 'dev/null'):
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000358 # Copy or rename.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000359 self.source_filename = old.decode('utf-8')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000360 self.is_new = True
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000361
maruel@chromium.org97366be2011-06-03 20:02:46 +0000362 last_line = ''
363
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000364 while lines:
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000365 line = lines.pop(0)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000366 self._verify_git_header_process_line(lines, line, last_line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000367 last_line = line
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000368
maruel@chromium.org97366be2011-06-03 20:02:46 +0000369 # Cheap check to make sure the file name is at least mentioned in the
370 # 'diff' header. That the only remaining invariant.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000371 if not self.filename_utf8 in self.diff_header:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000372 self._fail('Diff seems corrupted.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000373
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000374 def _verify_git_header_process_line(self, lines, line, last_line):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000375 """Processes a single line of the header.
376
377 Returns True if it should continue looping.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000378
379 Format is described to
380 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
maruel@chromium.org97366be2011-06-03 20:02:46 +0000381 """
maruel@chromium.org97366be2011-06-03 20:02:46 +0000382 match = re.match(r'^(rename|copy) from (.+)$', line)
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000383 old = self.source_filename_utf8 or self.filename_utf8
maruel@chromium.org97366be2011-06-03 20:02:46 +0000384 if match:
385 if old != match.group(2):
386 self._fail('Unexpected git diff input name for line %s.' % line)
387 if not lines or not lines[0].startswith('%s to ' % match.group(1)):
388 self._fail(
389 'Confused %s from/to git diff for line %s.' %
390 (match.group(1), line))
391 return
392
maruel@chromium.org97366be2011-06-03 20:02:46 +0000393 match = re.match(r'^(rename|copy) to (.+)$', line)
394 if match:
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000395 if self.filename_utf8 != match.group(2):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000396 self._fail('Unexpected git diff output name for line %s.' % line)
397 if not last_line.startswith('%s from ' % match.group(1)):
398 self._fail(
399 'Confused %s from/to git diff for line %s.' %
400 (match.group(1), line))
401 return
402
maruel@chromium.org40052252011-11-11 20:54:55 +0000403 match = re.match(r'^deleted file mode (\d{6})$', line)
404 if match:
405 # It is necessary to parse it because there may be no hunk, like when the
406 # file was empty.
407 self.is_delete = True
408 return
409
maruel@chromium.org378a4192011-06-06 13:36:02 +0000410 match = re.match(r'^new(| file) mode (\d{6})$', line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000411 if match:
maruel@chromium.org378a4192011-06-06 13:36:02 +0000412 mode = match.group(2)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000413 # Only look at owner ACL for executable.
maruel@chromium.org86eb9e72011-06-03 20:14:52 +0000414 if bool(int(mode[4]) & 1):
maruel@chromium.orge1a03762012-09-24 15:28:52 +0000415 self.svn_properties.append(('svn:executable', '.'))
maruel@chromium.orgdffc73c2012-09-21 19:09:16 +0000416 elif not self.source_filename and self.is_new:
417 # It's a new file, not from a rename/copy, then there's no property to
418 # delete.
maruel@chromium.orgd7ca6162012-08-29 17:22:22 +0000419 self.svn_properties.append(('svn:executable', None))
maruel@chromium.org40052252011-11-11 20:54:55 +0000420 return
maruel@chromium.org97366be2011-06-03 20:02:46 +0000421
maruel@chromium.org97366be2011-06-03 20:02:46 +0000422 match = re.match(r'^--- (.*)$', line)
423 if match:
424 if last_line[:3] in ('---', '+++'):
425 self._fail('--- and +++ are reversed')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000426 if match.group(1) == '/dev/null':
427 self.is_new = True
428 elif self.mangle(match.group(1)) != old:
429 # git patches are always well formatted, do not allow random filenames.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000430 self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000431 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000432 self._fail('Missing git diff output name.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000433 return
434
maruel@chromium.org97366be2011-06-03 20:02:46 +0000435 match = re.match(r'^\+\+\+ (.*)$', line)
436 if match:
437 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000438 self._fail('Unexpected git diff: --- not following +++.')
maruel@chromium.orgbe605652011-09-02 20:28:07 +0000439 if '/dev/null' == match.group(1):
440 self.is_delete = True
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000441 elif self.filename_utf8 != self.mangle(match.group(1)):
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000442 self._fail(
443 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000444 if lines:
445 self._fail('Crap after +++')
446 # We're done.
447 return
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000448
449 def _verify_svn_header(self):
450 """Sanity checks the header.
451
452 A svn diff can contain only property changes, in that case there will be no
453 proper header. To make things worse, this property change header is
454 localized.
455 """
456 lines = self.diff_header.splitlines()
maruel@chromium.org97366be2011-06-03 20:02:46 +0000457 last_line = ''
458
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000459 while lines:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000460 line = lines.pop(0)
461 self._verify_svn_header_process_line(lines, line, last_line)
462 last_line = line
463
464 # Cheap check to make sure the file name is at least mentioned in the
465 # 'diff' header. That the only remaining invariant.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000466 if not self.filename_utf8 in self.diff_header:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000467 self._fail('Diff seems corrupted.')
468
469 def _verify_svn_header_process_line(self, lines, line, last_line):
470 """Processes a single line of the header.
471
472 Returns True if it should continue looping.
473 """
474 match = re.match(r'^--- ([^\t]+).*$', line)
475 if match:
476 if last_line[:3] in ('---', '+++'):
477 self._fail('--- and +++ are reversed')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000478 if match.group(1) == '/dev/null':
479 self.is_new = True
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000480 elif self.mangle(match.group(1)) != self.filename_utf8:
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000481 # guess the source filename.
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000482 self.source_filename = match.group(1).decode('utf-8')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000483 self.is_new = True
maruel@chromium.org97366be2011-06-03 20:02:46 +0000484 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000485 self._fail('Nothing after header.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000486 return
487
488 match = re.match(r'^\+\+\+ ([^\t]+).*$', line)
489 if match:
490 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000491 self._fail('Unexpected diff: --- not following +++.')
maruel@chromium.orgbe605652011-09-02 20:28:07 +0000492 if match.group(1) == '/dev/null':
493 self.is_delete = True
maruel@chromium.org8fab6b62012-02-16 21:50:35 +0000494 elif self.mangle(match.group(1)) != self.filename_utf8:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000495 self._fail('Unexpected diff: %s.' % match.group(1))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000496 if lines:
497 self._fail('Crap after +++')
498 # We're done.
499 return
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000500
maruel@chromium.org4dd9f722012-10-01 16:23:03 +0000501 def dump(self):
502 """Dumps itself in a verbose way to help diagnosing."""
503 return str(self) + '\n' + self.get(True)
504
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000505
506class PatchSet(object):
507 """A list of FilePatch* objects."""
508
509 def __init__(self, patches):
maruel@chromium.org5e975632011-09-29 18:07:06 +0000510 for p in patches:
maruel@chromium.org8a1396c2011-04-22 00:14:24 +0000511 assert isinstance(p, FilePatchBase)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000512
maruel@chromium.org5e975632011-09-29 18:07:06 +0000513 def key(p):
514 """Sort by ordering of application.
515
516 File move are first.
517 Deletes are last.
518 """
maruel@chromium.orgde800ff2012-09-12 19:25:24 +0000519 # The bool is necessary because None < 'string' but the reverse is needed.
520 return (
521 p.is_delete,
522 # False is before True, so files *with* a source file will be first.
523 not bool(p.source_filename),
524 p.source_filename_utf8,
525 p.filename_utf8)
maruel@chromium.org5e975632011-09-29 18:07:06 +0000526
527 self.patches = sorted(patches, key=key)
528
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000529 def set_relpath(self, relpath):
530 """Used to offset the patch into a subdirectory."""
531 for patch in self.patches:
532 patch.set_relpath(relpath)
533
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000534 def __iter__(self):
535 for patch in self.patches:
536 yield patch
537
maruel@chromium.org5e975632011-09-29 18:07:06 +0000538 def __getitem__(self, key):
539 return self.patches[key]
540
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000541 @property
542 def filenames(self):
543 return [p.filename for p in self.patches]