blob: 4c96de3561339925048cd4398b3636191634b599 [file] [log] [blame]
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00001# coding=utf8
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Utility functions to handle patches."""
6
maruel@chromium.orgcd619402011-04-09 00:08:00 +00007import posixpath
8import os
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00009import re
10
11
12class UnsupportedPatchFormat(Exception):
13 def __init__(self, filename, status):
14 super(UnsupportedPatchFormat, self).__init__(filename, status)
15 self.filename = filename
16 self.status = status
17
18 def __str__(self):
19 out = 'Can\'t process patch for file %s.' % self.filename
20 if self.status:
21 out += '\n%s' % self.status
22 return out
23
24
25class FilePatchBase(object):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000026 """Defines a single file being modified.
27
28 '/' is always used instead of os.sep for consistency.
29 """
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000030 is_delete = False
31 is_binary = False
maruel@chromium.org97366be2011-06-03 20:02:46 +000032 is_new = False
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000033
maruel@chromium.orgcd619402011-04-09 00:08:00 +000034 def __init__(self, filename):
maruel@chromium.org5e975632011-09-29 18:07:06 +000035 assert self.__class__ is not FilePatchBase
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000036 self.filename = self._process_filename(filename)
maruel@chromium.orga19047c2011-09-08 12:49:58 +000037 # Set when the file is copied or moved.
38 self.source_filename = None
maruel@chromium.orgcd619402011-04-09 00:08:00 +000039
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000040 @staticmethod
41 def _process_filename(filename):
42 filename = filename.replace('\\', '/')
maruel@chromium.orgcd619402011-04-09 00:08:00 +000043 # Blacklist a few characters for simplicity.
44 for i in ('%', '$', '..', '\'', '"'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000045 if i in filename:
46 raise UnsupportedPatchFormat(
47 filename, 'Can\'t use \'%s\' in filename.' % i)
maruel@chromium.orgcd619402011-04-09 00:08:00 +000048 for i in ('/', 'CON', 'COM'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000049 if filename.startswith(i):
50 raise UnsupportedPatchFormat(
51 filename, 'Filename can\'t start with \'%s\'.' % i)
52 return filename
maruel@chromium.orgcd619402011-04-09 00:08:00 +000053
maruel@chromium.orgcd619402011-04-09 00:08:00 +000054 def set_relpath(self, relpath):
55 if not relpath:
56 return
57 relpath = relpath.replace('\\', '/')
58 if relpath[0] == '/':
59 self._fail('Relative path starts with %s' % relpath[0])
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000060 self.filename = self._process_filename(
61 posixpath.join(relpath, self.filename))
maruel@chromium.orga19047c2011-09-08 12:49:58 +000062 if self.source_filename:
63 self.source_filename = self._process_filename(
64 posixpath.join(relpath, self.source_filename))
maruel@chromium.orgcd619402011-04-09 00:08:00 +000065
66 def _fail(self, msg):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000067 """Shortcut function to raise UnsupportedPatchFormat."""
maruel@chromium.orgcd619402011-04-09 00:08:00 +000068 raise UnsupportedPatchFormat(self.filename, msg)
69
maruel@chromium.org5e975632011-09-29 18:07:06 +000070 def __str__(self):
71 # Use a status-like board.
72 out = ''
73 if self.is_binary:
74 out += 'B'
75 else:
76 out += ' '
77 if self.is_delete:
78 out += 'D'
79 else:
80 out += ' '
81 if self.is_new:
82 out += 'N'
83 else:
84 out += ' '
85 if self.source_filename:
86 out += 'R'
87 else:
88 out += ' '
89 return out + ' %s->%s' % (self.source_filename, self.filename)
90
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000091
92class FilePatchDelete(FilePatchBase):
93 """Deletes a file."""
94 is_delete = True
95
96 def __init__(self, filename, is_binary):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000097 super(FilePatchDelete, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000098 self.is_binary = is_binary
99
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000100
101class FilePatchBinary(FilePatchBase):
102 """Content of a new binary file."""
103 is_binary = True
104
maruel@chromium.org97366be2011-06-03 20:02:46 +0000105 def __init__(self, filename, data, svn_properties, is_new):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000106 super(FilePatchBinary, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000107 self.data = data
108 self.svn_properties = svn_properties or []
maruel@chromium.org97366be2011-06-03 20:02:46 +0000109 self.is_new = is_new
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000110
111 def get(self):
112 return self.data
113
114
115class FilePatchDiff(FilePatchBase):
116 """Patch for a single file."""
117
118 def __init__(self, filename, diff, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000119 super(FilePatchDiff, self).__init__(filename)
maruel@chromium.org61e0b692011-04-12 21:01:01 +0000120 if not diff:
121 self._fail('File doesn\'t have a diff.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000122 self.diff_header, self.diff_hunks = self._split_header(diff)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000123 self.svn_properties = svn_properties or []
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000124 self.is_git_diff = self._is_git_diff_header(self.diff_header)
125 self.patchlevel = 0
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000126 if self.is_git_diff:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000127 self._verify_git_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000128 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000129 self._verify_svn_header()
maruel@chromium.org5e975632011-09-29 18:07:06 +0000130 if self.source_filename and not self.is_new:
131 self._fail('If source_filename is set, is_new must be also be set')
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000132
maruel@chromium.org5e975632011-09-29 18:07:06 +0000133 def get(self, for_git):
134 if for_git or not self.source_filename:
135 return self.diff_header + self.diff_hunks
136 else:
137 # patch is stupid. It patches the source_filename instead so get rid of
138 # any source_filename reference if needed.
139 return (
140 self.diff_header.replace(self.source_filename, self.filename) +
141 self.diff_hunks)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000142
143 def set_relpath(self, relpath):
144 old_filename = self.filename
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000145 old_source_filename = self.source_filename or self.filename
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000146 super(FilePatchDiff, self).set_relpath(relpath)
147 # Update the header too.
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000148 source_filename = self.source_filename or self.filename
149 lines = self.diff_header.splitlines(True)
150 for i, line in enumerate(lines):
151 if line.startswith('diff --git'):
152 lines[i] = line.replace(
153 'a/' + old_source_filename, source_filename).replace(
154 'b/' + old_filename, self.filename)
155 elif re.match(r'^\w+ from .+$', line) or line.startswith('---'):
156 lines[i] = line.replace(old_source_filename, source_filename)
157 elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'):
158 lines[i] = line.replace(old_filename, self.filename)
159 self.diff_header = ''.join(lines)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000160
161 def _split_header(self, diff):
162 """Splits a diff in two: the header and the hunks."""
163 header = []
164 hunks = diff.splitlines(True)
165 while hunks:
166 header.append(hunks.pop(0))
167 if header[-1].startswith('--- '):
168 break
169 else:
170 # Some diff may not have a ---/+++ set like a git rename with no change or
171 # a svn diff with only property change.
172 pass
173
174 if hunks:
175 if not hunks[0].startswith('+++ '):
176 self._fail('Inconsistent header')
177 header.append(hunks.pop(0))
178 if hunks:
179 if not hunks[0].startswith('@@ '):
180 self._fail('Inconsistent hunk header')
181
182 # Mangle any \\ in the header to /.
183 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
184 basename = os.path.basename(self.filename)
185 for i in xrange(len(header)):
186 if (header[i].split(' ', 1)[0] in header_lines or
187 header[i].endswith(basename)):
188 header[i] = header[i].replace('\\', '/')
189 return ''.join(header), ''.join(hunks)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000190
191 @staticmethod
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000192 def _is_git_diff_header(diff_header):
193 """Returns True if the diff for a single files was generated with git."""
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000194 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
195 # Rename partial change:
196 # http://codereview.chromium.org/download/issue6250123_3013_6010.diff
197 # Rename no change:
198 # http://codereview.chromium.org/download/issue6287022_3001_4010.diff
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000199 return any(l.startswith('diff --git') for l in diff_header.splitlines())
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000200
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000201 def mangle(self, string):
202 """Mangle a file path."""
203 return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:])
204
205 def _verify_git_header(self):
206 """Sanity checks the header.
207
208 Expects the following format:
209
210 <garbagge>
211 diff --git (|a/)<filename> (|b/)<filename>
212 <similarity>
213 <filemode changes>
214 <index>
215 <copy|rename from>
216 <copy|rename to>
217 --- <filename>
218 +++ <filename>
219
220 Everything is optional except the diff --git line.
221 """
222 lines = self.diff_header.splitlines()
223
224 # Verify the diff --git line.
225 old = None
226 new = None
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000227 while lines:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000228 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
229 if not match:
230 continue
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000231 if match.group(1).startswith('a/') and match.group(2).startswith('b/'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000232 self.patchlevel = 1
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000233 old = self.mangle(match.group(1))
234 new = self.mangle(match.group(2))
235
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000236 # The rename is about the new file so the old file can be anything.
237 if new not in (self.filename, 'dev/null'):
238 self._fail('Unexpected git diff output name %s.' % new)
239 if old == 'dev/null' and new == 'dev/null':
240 self._fail('Unexpected /dev/null git diff.')
241 break
242
243 if not old or not new:
244 self._fail('Unexpected git diff; couldn\'t find git header.')
245
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000246 if old not in (self.filename, 'dev/null'):
247 # Copy or rename.
248 self.source_filename = old
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000249 self.is_new = True
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000250
maruel@chromium.org97366be2011-06-03 20:02:46 +0000251 last_line = ''
252
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000253 while lines:
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000254 line = lines.pop(0)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000255 self._verify_git_header_process_line(lines, line, last_line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000256 last_line = line
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000257
maruel@chromium.org97366be2011-06-03 20:02:46 +0000258 # Cheap check to make sure the file name is at least mentioned in the
259 # 'diff' header. That the only remaining invariant.
260 if not self.filename in self.diff_header:
261 self._fail('Diff seems corrupted.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000262
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000263 def _verify_git_header_process_line(self, lines, line, last_line):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000264 """Processes a single line of the header.
265
266 Returns True if it should continue looping.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000267
268 Format is described to
269 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
maruel@chromium.org97366be2011-06-03 20:02:46 +0000270 """
maruel@chromium.org97366be2011-06-03 20:02:46 +0000271 match = re.match(r'^(rename|copy) from (.+)$', line)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000272 old = self.source_filename or self.filename
maruel@chromium.org97366be2011-06-03 20:02:46 +0000273 if match:
274 if old != match.group(2):
275 self._fail('Unexpected git diff input name for line %s.' % line)
276 if not lines or not lines[0].startswith('%s to ' % match.group(1)):
277 self._fail(
278 'Confused %s from/to git diff for line %s.' %
279 (match.group(1), line))
280 return
281
maruel@chromium.org97366be2011-06-03 20:02:46 +0000282 match = re.match(r'^(rename|copy) to (.+)$', line)
283 if match:
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000284 if self.filename != match.group(2):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000285 self._fail('Unexpected git diff output name for line %s.' % line)
286 if not last_line.startswith('%s from ' % match.group(1)):
287 self._fail(
288 'Confused %s from/to git diff for line %s.' %
289 (match.group(1), line))
290 return
291
maruel@chromium.org5e975632011-09-29 18:07:06 +0000292 # Ignore "deleted file mode 100644" since it's not needed.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000293 match = re.match(r'^new(| file) mode (\d{6})$', line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000294 if match:
maruel@chromium.org378a4192011-06-06 13:36:02 +0000295 mode = match.group(2)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000296 # Only look at owner ACL for executable.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000297 # TODO(maruel): Add support to remove a property.
maruel@chromium.org86eb9e72011-06-03 20:14:52 +0000298 if bool(int(mode[4]) & 1):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000299 self.svn_properties.append(('svn:executable', '*'))
300
maruel@chromium.org97366be2011-06-03 20:02:46 +0000301 match = re.match(r'^--- (.*)$', line)
302 if match:
303 if last_line[:3] in ('---', '+++'):
304 self._fail('--- and +++ are reversed')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000305 if match.group(1) == '/dev/null':
306 self.is_new = True
307 elif self.mangle(match.group(1)) != old:
308 # git patches are always well formatted, do not allow random filenames.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000309 self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000310 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000311 self._fail('Missing git diff output name.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000312 return
313
maruel@chromium.org97366be2011-06-03 20:02:46 +0000314 match = re.match(r'^\+\+\+ (.*)$', line)
315 if match:
316 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000317 self._fail('Unexpected git diff: --- not following +++.')
maruel@chromium.orgbe605652011-09-02 20:28:07 +0000318 if '/dev/null' == match.group(1):
319 self.is_delete = True
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000320 elif self.filename != self.mangle(match.group(1)):
321 self._fail(
322 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000323 if lines:
324 self._fail('Crap after +++')
325 # We're done.
326 return
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000327
328 def _verify_svn_header(self):
329 """Sanity checks the header.
330
331 A svn diff can contain only property changes, in that case there will be no
332 proper header. To make things worse, this property change header is
333 localized.
334 """
335 lines = self.diff_header.splitlines()
maruel@chromium.org97366be2011-06-03 20:02:46 +0000336 last_line = ''
337
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000338 while lines:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000339 line = lines.pop(0)
340 self._verify_svn_header_process_line(lines, line, last_line)
341 last_line = line
342
343 # Cheap check to make sure the file name is at least mentioned in the
344 # 'diff' header. That the only remaining invariant.
345 if not self.filename in self.diff_header:
346 self._fail('Diff seems corrupted.')
347
348 def _verify_svn_header_process_line(self, lines, line, last_line):
349 """Processes a single line of the header.
350
351 Returns True if it should continue looping.
352 """
353 match = re.match(r'^--- ([^\t]+).*$', line)
354 if match:
355 if last_line[:3] in ('---', '+++'):
356 self._fail('--- and +++ are reversed')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000357 if match.group(1) == '/dev/null':
358 self.is_new = True
359 elif self.mangle(match.group(1)) != self.filename:
360 # guess the source filename.
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000361 self.source_filename = match.group(1)
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000362 self.is_new = True
maruel@chromium.org97366be2011-06-03 20:02:46 +0000363 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000364 self._fail('Nothing after header.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000365 return
366
367 match = re.match(r'^\+\+\+ ([^\t]+).*$', line)
368 if match:
369 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000370 self._fail('Unexpected diff: --- not following +++.')
maruel@chromium.orgbe605652011-09-02 20:28:07 +0000371 if match.group(1) == '/dev/null':
372 self.is_delete = True
373 elif self.mangle(match.group(1)) != self.filename:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000374 self._fail('Unexpected diff: %s.' % match.group(1))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000375 if lines:
376 self._fail('Crap after +++')
377 # We're done.
378 return
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000379
380
381class PatchSet(object):
382 """A list of FilePatch* objects."""
383
384 def __init__(self, patches):
maruel@chromium.org5e975632011-09-29 18:07:06 +0000385 for p in patches:
maruel@chromium.org8a1396c2011-04-22 00:14:24 +0000386 assert isinstance(p, FilePatchBase)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000387
maruel@chromium.org5e975632011-09-29 18:07:06 +0000388 def key(p):
389 """Sort by ordering of application.
390
391 File move are first.
392 Deletes are last.
393 """
394 if p.source_filename:
395 return (p.is_delete, p.source_filename, p.filename)
396 else:
397 # tuple are always greater than string, abuse that fact.
398 return (p.is_delete, (p.filename,), p.filename)
399
400 self.patches = sorted(patches, key=key)
401
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000402 def set_relpath(self, relpath):
403 """Used to offset the patch into a subdirectory."""
404 for patch in self.patches:
405 patch.set_relpath(relpath)
406
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000407 def __iter__(self):
408 for patch in self.patches:
409 yield patch
410
maruel@chromium.org5e975632011-09-29 18:07:06 +0000411 def __getitem__(self, key):
412 return self.patches[key]
413
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000414 @property
415 def filenames(self):
416 return [p.filename for p in self.patches]