blob: 4a67cadfced14efa28c9e232399593b3dafaae02 [file] [log] [blame]
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00001# coding=utf8
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Utility functions to handle patches."""
6
maruel@chromium.orgcd619402011-04-09 00:08:00 +00007import posixpath
8import os
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00009import re
10
11
12class UnsupportedPatchFormat(Exception):
13 def __init__(self, filename, status):
14 super(UnsupportedPatchFormat, self).__init__(filename, status)
15 self.filename = filename
16 self.status = status
17
18 def __str__(self):
19 out = 'Can\'t process patch for file %s.' % self.filename
20 if self.status:
21 out += '\n%s' % self.status
22 return out
23
24
25class FilePatchBase(object):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000026 """Defines a single file being modified.
27
28 '/' is always used instead of os.sep for consistency.
29 """
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000030 is_delete = False
31 is_binary = False
maruel@chromium.org97366be2011-06-03 20:02:46 +000032 is_new = False
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000033
maruel@chromium.orgcd619402011-04-09 00:08:00 +000034 def __init__(self, filename):
maruel@chromium.org5e975632011-09-29 18:07:06 +000035 assert self.__class__ is not FilePatchBase
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000036 self.filename = self._process_filename(filename)
maruel@chromium.orga19047c2011-09-08 12:49:58 +000037 # Set when the file is copied or moved.
38 self.source_filename = None
maruel@chromium.orgcd619402011-04-09 00:08:00 +000039
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000040 @staticmethod
41 def _process_filename(filename):
42 filename = filename.replace('\\', '/')
maruel@chromium.orgcd619402011-04-09 00:08:00 +000043 # Blacklist a few characters for simplicity.
44 for i in ('%', '$', '..', '\'', '"'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000045 if i in filename:
46 raise UnsupportedPatchFormat(
47 filename, 'Can\'t use \'%s\' in filename.' % i)
maruel@chromium.orgcd619402011-04-09 00:08:00 +000048 for i in ('/', 'CON', 'COM'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000049 if filename.startswith(i):
50 raise UnsupportedPatchFormat(
51 filename, 'Filename can\'t start with \'%s\'.' % i)
52 return filename
maruel@chromium.orgcd619402011-04-09 00:08:00 +000053
maruel@chromium.orgcd619402011-04-09 00:08:00 +000054 def set_relpath(self, relpath):
55 if not relpath:
56 return
57 relpath = relpath.replace('\\', '/')
58 if relpath[0] == '/':
59 self._fail('Relative path starts with %s' % relpath[0])
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000060 self.filename = self._process_filename(
61 posixpath.join(relpath, self.filename))
maruel@chromium.orga19047c2011-09-08 12:49:58 +000062 if self.source_filename:
63 self.source_filename = self._process_filename(
64 posixpath.join(relpath, self.source_filename))
maruel@chromium.orgcd619402011-04-09 00:08:00 +000065
66 def _fail(self, msg):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000067 """Shortcut function to raise UnsupportedPatchFormat."""
maruel@chromium.orgcd619402011-04-09 00:08:00 +000068 raise UnsupportedPatchFormat(self.filename, msg)
69
maruel@chromium.org5e975632011-09-29 18:07:06 +000070 def __str__(self):
71 # Use a status-like board.
72 out = ''
73 if self.is_binary:
74 out += 'B'
75 else:
76 out += ' '
77 if self.is_delete:
78 out += 'D'
79 else:
80 out += ' '
81 if self.is_new:
82 out += 'N'
83 else:
84 out += ' '
85 if self.source_filename:
86 out += 'R'
87 else:
88 out += ' '
89 return out + ' %s->%s' % (self.source_filename, self.filename)
90
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000091
92class FilePatchDelete(FilePatchBase):
93 """Deletes a file."""
94 is_delete = True
95
96 def __init__(self, filename, is_binary):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000097 super(FilePatchDelete, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000098 self.is_binary = is_binary
99
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000100
101class FilePatchBinary(FilePatchBase):
102 """Content of a new binary file."""
103 is_binary = True
104
maruel@chromium.org97366be2011-06-03 20:02:46 +0000105 def __init__(self, filename, data, svn_properties, is_new):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000106 super(FilePatchBinary, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000107 self.data = data
108 self.svn_properties = svn_properties or []
maruel@chromium.org97366be2011-06-03 20:02:46 +0000109 self.is_new = is_new
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000110
111 def get(self):
112 return self.data
113
114
115class FilePatchDiff(FilePatchBase):
116 """Patch for a single file."""
117
118 def __init__(self, filename, diff, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000119 super(FilePatchDiff, self).__init__(filename)
maruel@chromium.org61e0b692011-04-12 21:01:01 +0000120 if not diff:
121 self._fail('File doesn\'t have a diff.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000122 self.diff_header, self.diff_hunks = self._split_header(diff)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000123 self.svn_properties = svn_properties or []
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000124 self.is_git_diff = self._is_git_diff_header(self.diff_header)
125 self.patchlevel = 0
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000126 if self.is_git_diff:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000127 self._verify_git_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000128 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000129 self._verify_svn_header()
maruel@chromium.org5e975632011-09-29 18:07:06 +0000130 if self.source_filename and not self.is_new:
131 self._fail('If source_filename is set, is_new must be also be set')
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000132
maruel@chromium.org5e975632011-09-29 18:07:06 +0000133 def get(self, for_git):
134 if for_git or not self.source_filename:
135 return self.diff_header + self.diff_hunks
136 else:
137 # patch is stupid. It patches the source_filename instead so get rid of
138 # any source_filename reference if needed.
139 return (
140 self.diff_header.replace(self.source_filename, self.filename) +
141 self.diff_hunks)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000142
143 def set_relpath(self, relpath):
144 old_filename = self.filename
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000145 old_source_filename = self.source_filename or self.filename
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000146 super(FilePatchDiff, self).set_relpath(relpath)
147 # Update the header too.
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000148 source_filename = self.source_filename or self.filename
149 lines = self.diff_header.splitlines(True)
150 for i, line in enumerate(lines):
151 if line.startswith('diff --git'):
152 lines[i] = line.replace(
153 'a/' + old_source_filename, source_filename).replace(
154 'b/' + old_filename, self.filename)
155 elif re.match(r'^\w+ from .+$', line) or line.startswith('---'):
156 lines[i] = line.replace(old_source_filename, source_filename)
157 elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'):
158 lines[i] = line.replace(old_filename, self.filename)
159 self.diff_header = ''.join(lines)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000160
161 def _split_header(self, diff):
162 """Splits a diff in two: the header and the hunks."""
163 header = []
164 hunks = diff.splitlines(True)
165 while hunks:
166 header.append(hunks.pop(0))
167 if header[-1].startswith('--- '):
168 break
169 else:
170 # Some diff may not have a ---/+++ set like a git rename with no change or
171 # a svn diff with only property change.
172 pass
173
174 if hunks:
175 if not hunks[0].startswith('+++ '):
176 self._fail('Inconsistent header')
177 header.append(hunks.pop(0))
178 if hunks:
179 if not hunks[0].startswith('@@ '):
180 self._fail('Inconsistent hunk header')
181
182 # Mangle any \\ in the header to /.
183 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
184 basename = os.path.basename(self.filename)
185 for i in xrange(len(header)):
186 if (header[i].split(' ', 1)[0] in header_lines or
187 header[i].endswith(basename)):
188 header[i] = header[i].replace('\\', '/')
189 return ''.join(header), ''.join(hunks)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000190
191 @staticmethod
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000192 def _is_git_diff_header(diff_header):
193 """Returns True if the diff for a single files was generated with git."""
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000194 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
195 # Rename partial change:
196 # http://codereview.chromium.org/download/issue6250123_3013_6010.diff
197 # Rename no change:
198 # http://codereview.chromium.org/download/issue6287022_3001_4010.diff
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000199 return any(l.startswith('diff --git') for l in diff_header.splitlines())
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000200
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000201 def mangle(self, string):
202 """Mangle a file path."""
203 return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:])
204
205 def _verify_git_header(self):
206 """Sanity checks the header.
207
208 Expects the following format:
209
210 <garbagge>
211 diff --git (|a/)<filename> (|b/)<filename>
212 <similarity>
213 <filemode changes>
214 <index>
215 <copy|rename from>
216 <copy|rename to>
217 --- <filename>
218 +++ <filename>
219
220 Everything is optional except the diff --git line.
221 """
222 lines = self.diff_header.splitlines()
223
224 # Verify the diff --git line.
225 old = None
226 new = None
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000227 while lines:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000228 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
229 if not match:
230 continue
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000231 if match.group(1).startswith('a/') and match.group(2).startswith('b/'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000232 self.patchlevel = 1
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000233 old = self.mangle(match.group(1))
234 new = self.mangle(match.group(2))
235
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000236 # The rename is about the new file so the old file can be anything.
237 if new not in (self.filename, 'dev/null'):
238 self._fail('Unexpected git diff output name %s.' % new)
239 if old == 'dev/null' and new == 'dev/null':
240 self._fail('Unexpected /dev/null git diff.')
241 break
242
243 if not old or not new:
244 self._fail('Unexpected git diff; couldn\'t find git header.')
245
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000246 if old not in (self.filename, 'dev/null'):
247 # Copy or rename.
248 self.source_filename = old
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000249 self.is_new = True
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000250
maruel@chromium.org97366be2011-06-03 20:02:46 +0000251 last_line = ''
252
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000253 while lines:
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000254 line = lines.pop(0)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000255 self._verify_git_header_process_line(lines, line, last_line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000256 last_line = line
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000257
maruel@chromium.org97366be2011-06-03 20:02:46 +0000258 # Cheap check to make sure the file name is at least mentioned in the
259 # 'diff' header. That the only remaining invariant.
260 if not self.filename in self.diff_header:
261 self._fail('Diff seems corrupted.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000262
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000263 def _verify_git_header_process_line(self, lines, line, last_line):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000264 """Processes a single line of the header.
265
266 Returns True if it should continue looping.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000267
268 Format is described to
269 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
maruel@chromium.org97366be2011-06-03 20:02:46 +0000270 """
maruel@chromium.org97366be2011-06-03 20:02:46 +0000271 match = re.match(r'^(rename|copy) from (.+)$', line)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000272 old = self.source_filename or self.filename
maruel@chromium.org97366be2011-06-03 20:02:46 +0000273 if match:
274 if old != match.group(2):
275 self._fail('Unexpected git diff input name for line %s.' % line)
276 if not lines or not lines[0].startswith('%s to ' % match.group(1)):
277 self._fail(
278 'Confused %s from/to git diff for line %s.' %
279 (match.group(1), line))
280 return
281
maruel@chromium.org97366be2011-06-03 20:02:46 +0000282 match = re.match(r'^(rename|copy) to (.+)$', line)
283 if match:
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000284 if self.filename != match.group(2):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000285 self._fail('Unexpected git diff output name for line %s.' % line)
286 if not last_line.startswith('%s from ' % match.group(1)):
287 self._fail(
288 'Confused %s from/to git diff for line %s.' %
289 (match.group(1), line))
290 return
291
maruel@chromium.org40052252011-11-11 20:54:55 +0000292 match = re.match(r'^deleted file mode (\d{6})$', line)
293 if match:
294 # It is necessary to parse it because there may be no hunk, like when the
295 # file was empty.
296 self.is_delete = True
297 return
298
maruel@chromium.org378a4192011-06-06 13:36:02 +0000299 match = re.match(r'^new(| file) mode (\d{6})$', line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000300 if match:
maruel@chromium.org378a4192011-06-06 13:36:02 +0000301 mode = match.group(2)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000302 # Only look at owner ACL for executable.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000303 # TODO(maruel): Add support to remove a property.
maruel@chromium.org86eb9e72011-06-03 20:14:52 +0000304 if bool(int(mode[4]) & 1):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000305 self.svn_properties.append(('svn:executable', '*'))
maruel@chromium.org40052252011-11-11 20:54:55 +0000306 return
maruel@chromium.org97366be2011-06-03 20:02:46 +0000307
maruel@chromium.org97366be2011-06-03 20:02:46 +0000308 match = re.match(r'^--- (.*)$', line)
309 if match:
310 if last_line[:3] in ('---', '+++'):
311 self._fail('--- and +++ are reversed')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000312 if match.group(1) == '/dev/null':
313 self.is_new = True
314 elif self.mangle(match.group(1)) != old:
315 # git patches are always well formatted, do not allow random filenames.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000316 self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000317 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000318 self._fail('Missing git diff output name.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000319 return
320
maruel@chromium.org97366be2011-06-03 20:02:46 +0000321 match = re.match(r'^\+\+\+ (.*)$', line)
322 if match:
323 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000324 self._fail('Unexpected git diff: --- not following +++.')
maruel@chromium.orgbe605652011-09-02 20:28:07 +0000325 if '/dev/null' == match.group(1):
326 self.is_delete = True
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000327 elif self.filename != self.mangle(match.group(1)):
328 self._fail(
329 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000330 if lines:
331 self._fail('Crap after +++')
332 # We're done.
333 return
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000334
335 def _verify_svn_header(self):
336 """Sanity checks the header.
337
338 A svn diff can contain only property changes, in that case there will be no
339 proper header. To make things worse, this property change header is
340 localized.
341 """
342 lines = self.diff_header.splitlines()
maruel@chromium.org97366be2011-06-03 20:02:46 +0000343 last_line = ''
344
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000345 while lines:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000346 line = lines.pop(0)
347 self._verify_svn_header_process_line(lines, line, last_line)
348 last_line = line
349
350 # Cheap check to make sure the file name is at least mentioned in the
351 # 'diff' header. That the only remaining invariant.
352 if not self.filename in self.diff_header:
353 self._fail('Diff seems corrupted.')
354
355 def _verify_svn_header_process_line(self, lines, line, last_line):
356 """Processes a single line of the header.
357
358 Returns True if it should continue looping.
359 """
360 match = re.match(r'^--- ([^\t]+).*$', line)
361 if match:
362 if last_line[:3] in ('---', '+++'):
363 self._fail('--- and +++ are reversed')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000364 if match.group(1) == '/dev/null':
365 self.is_new = True
366 elif self.mangle(match.group(1)) != self.filename:
367 # guess the source filename.
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000368 self.source_filename = match.group(1)
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000369 self.is_new = True
maruel@chromium.org97366be2011-06-03 20:02:46 +0000370 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000371 self._fail('Nothing after header.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000372 return
373
374 match = re.match(r'^\+\+\+ ([^\t]+).*$', line)
375 if match:
376 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000377 self._fail('Unexpected diff: --- not following +++.')
maruel@chromium.orgbe605652011-09-02 20:28:07 +0000378 if match.group(1) == '/dev/null':
379 self.is_delete = True
380 elif self.mangle(match.group(1)) != self.filename:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000381 self._fail('Unexpected diff: %s.' % match.group(1))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000382 if lines:
383 self._fail('Crap after +++')
384 # We're done.
385 return
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000386
387
388class PatchSet(object):
389 """A list of FilePatch* objects."""
390
391 def __init__(self, patches):
maruel@chromium.org5e975632011-09-29 18:07:06 +0000392 for p in patches:
maruel@chromium.org8a1396c2011-04-22 00:14:24 +0000393 assert isinstance(p, FilePatchBase)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000394
maruel@chromium.org5e975632011-09-29 18:07:06 +0000395 def key(p):
396 """Sort by ordering of application.
397
398 File move are first.
399 Deletes are last.
400 """
401 if p.source_filename:
402 return (p.is_delete, p.source_filename, p.filename)
403 else:
404 # tuple are always greater than string, abuse that fact.
405 return (p.is_delete, (p.filename,), p.filename)
406
407 self.patches = sorted(patches, key=key)
408
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000409 def set_relpath(self, relpath):
410 """Used to offset the patch into a subdirectory."""
411 for patch in self.patches:
412 patch.set_relpath(relpath)
413
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000414 def __iter__(self):
415 for patch in self.patches:
416 yield patch
417
maruel@chromium.org5e975632011-09-29 18:07:06 +0000418 def __getitem__(self, key):
419 return self.patches[key]
420
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000421 @property
422 def filenames(self):
423 return [p.filename for p in self.patches]