blob: 8bdeddb1e67e56c89dffed7f0aa0a7e106d546d4 [file] [log] [blame]
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00001# coding=utf8
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Utility functions to handle patches."""
6
maruel@chromium.orgcd619402011-04-09 00:08:00 +00007import posixpath
8import os
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00009import re
10
11
12class UnsupportedPatchFormat(Exception):
13 def __init__(self, filename, status):
14 super(UnsupportedPatchFormat, self).__init__(filename, status)
15 self.filename = filename
16 self.status = status
17
18 def __str__(self):
19 out = 'Can\'t process patch for file %s.' % self.filename
20 if self.status:
21 out += '\n%s' % self.status
22 return out
23
24
25class FilePatchBase(object):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000026 """Defines a single file being modified.
27
28 '/' is always used instead of os.sep for consistency.
29 """
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000030 is_delete = False
31 is_binary = False
maruel@chromium.org97366be2011-06-03 20:02:46 +000032 is_new = False
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000033
maruel@chromium.orgcd619402011-04-09 00:08:00 +000034 def __init__(self, filename):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000035 self.filename = self._process_filename(filename)
maruel@chromium.orgcd619402011-04-09 00:08:00 +000036
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000037 @staticmethod
38 def _process_filename(filename):
39 filename = filename.replace('\\', '/')
maruel@chromium.orgcd619402011-04-09 00:08:00 +000040 # Blacklist a few characters for simplicity.
41 for i in ('%', '$', '..', '\'', '"'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000042 if i in filename:
43 raise UnsupportedPatchFormat(
44 filename, 'Can\'t use \'%s\' in filename.' % i)
maruel@chromium.orgcd619402011-04-09 00:08:00 +000045 for i in ('/', 'CON', 'COM'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000046 if filename.startswith(i):
47 raise UnsupportedPatchFormat(
48 filename, 'Filename can\'t start with \'%s\'.' % i)
49 return filename
maruel@chromium.orgcd619402011-04-09 00:08:00 +000050
maruel@chromium.org97366be2011-06-03 20:02:46 +000051 def get(self): # pragma: no coverage
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000052 raise NotImplementedError('Nothing to grab')
53
maruel@chromium.orgcd619402011-04-09 00:08:00 +000054 def set_relpath(self, relpath):
55 if not relpath:
56 return
57 relpath = relpath.replace('\\', '/')
58 if relpath[0] == '/':
59 self._fail('Relative path starts with %s' % relpath[0])
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000060 self.filename = self._process_filename(
61 posixpath.join(relpath, self.filename))
maruel@chromium.orgcd619402011-04-09 00:08:00 +000062
63 def _fail(self, msg):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000064 """Shortcut function to raise UnsupportedPatchFormat."""
maruel@chromium.orgcd619402011-04-09 00:08:00 +000065 raise UnsupportedPatchFormat(self.filename, msg)
66
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000067
68class FilePatchDelete(FilePatchBase):
69 """Deletes a file."""
70 is_delete = True
71
72 def __init__(self, filename, is_binary):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000073 super(FilePatchDelete, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000074 self.is_binary = is_binary
75
76 def get(self):
77 raise NotImplementedError('Nothing to grab')
78
79
80class FilePatchBinary(FilePatchBase):
81 """Content of a new binary file."""
82 is_binary = True
83
maruel@chromium.org97366be2011-06-03 20:02:46 +000084 def __init__(self, filename, data, svn_properties, is_new):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000085 super(FilePatchBinary, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000086 self.data = data
87 self.svn_properties = svn_properties or []
maruel@chromium.org97366be2011-06-03 20:02:46 +000088 self.is_new = is_new
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000089
90 def get(self):
91 return self.data
92
93
94class FilePatchDiff(FilePatchBase):
95 """Patch for a single file."""
96
97 def __init__(self, filename, diff, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000098 super(FilePatchDiff, self).__init__(filename)
maruel@chromium.org61e0b692011-04-12 21:01:01 +000099 if not diff:
100 self._fail('File doesn\'t have a diff.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000101 self.diff_header, self.diff_hunks = self._split_header(diff)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000102 self.svn_properties = svn_properties or []
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000103 self.is_git_diff = self._is_git_diff_header(self.diff_header)
104 self.patchlevel = 0
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000105 if self.is_git_diff:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000106 self._verify_git_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000107 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000108 self._verify_svn_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000109
110 def get(self):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000111 return self.diff_header + self.diff_hunks
112
113 def set_relpath(self, relpath):
114 old_filename = self.filename
115 super(FilePatchDiff, self).set_relpath(relpath)
116 # Update the header too.
117 self.diff_header = self.diff_header.replace(old_filename, self.filename)
118
119 def _split_header(self, diff):
120 """Splits a diff in two: the header and the hunks."""
121 header = []
122 hunks = diff.splitlines(True)
123 while hunks:
124 header.append(hunks.pop(0))
125 if header[-1].startswith('--- '):
126 break
127 else:
128 # Some diff may not have a ---/+++ set like a git rename with no change or
129 # a svn diff with only property change.
130 pass
131
132 if hunks:
133 if not hunks[0].startswith('+++ '):
134 self._fail('Inconsistent header')
135 header.append(hunks.pop(0))
136 if hunks:
137 if not hunks[0].startswith('@@ '):
138 self._fail('Inconsistent hunk header')
139
140 # Mangle any \\ in the header to /.
141 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
142 basename = os.path.basename(self.filename)
143 for i in xrange(len(header)):
144 if (header[i].split(' ', 1)[0] in header_lines or
145 header[i].endswith(basename)):
146 header[i] = header[i].replace('\\', '/')
147 return ''.join(header), ''.join(hunks)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000148
149 @staticmethod
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000150 def _is_git_diff_header(diff_header):
151 """Returns True if the diff for a single files was generated with git."""
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000152 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
153 # Rename partial change:
154 # http://codereview.chromium.org/download/issue6250123_3013_6010.diff
155 # Rename no change:
156 # http://codereview.chromium.org/download/issue6287022_3001_4010.diff
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000157 return any(l.startswith('diff --git') for l in diff_header.splitlines())
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000158
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000159 def mangle(self, string):
160 """Mangle a file path."""
161 return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:])
162
163 def _verify_git_header(self):
164 """Sanity checks the header.
165
166 Expects the following format:
167
168 <garbagge>
169 diff --git (|a/)<filename> (|b/)<filename>
170 <similarity>
171 <filemode changes>
172 <index>
173 <copy|rename from>
174 <copy|rename to>
175 --- <filename>
176 +++ <filename>
177
178 Everything is optional except the diff --git line.
179 """
180 lines = self.diff_header.splitlines()
181
182 # Verify the diff --git line.
183 old = None
184 new = None
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000185 while lines:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000186 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
187 if not match:
188 continue
189 old = match.group(1).replace('\\', '/')
190 new = match.group(2).replace('\\', '/')
191 if old.startswith('a/') and new.startswith('b/'):
192 self.patchlevel = 1
193 old = old[2:]
194 new = new[2:]
195 # The rename is about the new file so the old file can be anything.
196 if new not in (self.filename, 'dev/null'):
197 self._fail('Unexpected git diff output name %s.' % new)
198 if old == 'dev/null' and new == 'dev/null':
199 self._fail('Unexpected /dev/null git diff.')
200 break
201
202 if not old or not new:
203 self._fail('Unexpected git diff; couldn\'t find git header.')
204
maruel@chromium.org97366be2011-06-03 20:02:46 +0000205 last_line = ''
206
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000207 while lines:
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000208 line = lines.pop(0)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000209 # TODO(maruel): old should be replace with self.source_file
210 # TODO(maruel): new == self.filename and remove new
211 self._verify_git_header_process_line(lines, line, last_line, old, new)
212 last_line = line
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000213
maruel@chromium.org97366be2011-06-03 20:02:46 +0000214 # Cheap check to make sure the file name is at least mentioned in the
215 # 'diff' header. That the only remaining invariant.
216 if not self.filename in self.diff_header:
217 self._fail('Diff seems corrupted.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000218
maruel@chromium.org97366be2011-06-03 20:02:46 +0000219 def _verify_git_header_process_line(self, lines, line, last_line, old, new):
220 """Processes a single line of the header.
221
222 Returns True if it should continue looping.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000223
224 Format is described to
225 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
maruel@chromium.org97366be2011-06-03 20:02:46 +0000226 """
maruel@chromium.org97366be2011-06-03 20:02:46 +0000227 match = re.match(r'^(rename|copy) from (.+)$', line)
228 if match:
229 if old != match.group(2):
230 self._fail('Unexpected git diff input name for line %s.' % line)
231 if not lines or not lines[0].startswith('%s to ' % match.group(1)):
232 self._fail(
233 'Confused %s from/to git diff for line %s.' %
234 (match.group(1), line))
235 return
236
maruel@chromium.org97366be2011-06-03 20:02:46 +0000237 match = re.match(r'^(rename|copy) to (.+)$', line)
238 if match:
239 if new != match.group(2):
240 self._fail('Unexpected git diff output name for line %s.' % line)
241 if not last_line.startswith('%s from ' % match.group(1)):
242 self._fail(
243 'Confused %s from/to git diff for line %s.' %
244 (match.group(1), line))
245 return
246
maruel@chromium.org378a4192011-06-06 13:36:02 +0000247 match = re.match(r'^new(| file) mode (\d{6})$', line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000248 if match:
maruel@chromium.org378a4192011-06-06 13:36:02 +0000249 mode = match.group(2)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000250 # Only look at owner ACL for executable.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000251 # TODO(maruel): Add support to remove a property.
maruel@chromium.org86eb9e72011-06-03 20:14:52 +0000252 if bool(int(mode[4]) & 1):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000253 self.svn_properties.append(('svn:executable', '*'))
254
maruel@chromium.org97366be2011-06-03 20:02:46 +0000255 match = re.match(r'^--- (.*)$', line)
256 if match:
257 if last_line[:3] in ('---', '+++'):
258 self._fail('--- and +++ are reversed')
259 self.is_new = match.group(1) == '/dev/null'
260 # TODO(maruel): Use self.source_file.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000261 if old != self.mangle(match.group(1)) and match.group(1) != '/dev/null':
262 self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000263 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000264 self._fail('Missing git diff output name.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000265 return
266
maruel@chromium.org97366be2011-06-03 20:02:46 +0000267 match = re.match(r'^\+\+\+ (.*)$', line)
268 if match:
269 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000270 self._fail('Unexpected git diff: --- not following +++.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000271 # TODO(maruel): new == self.filename.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000272 if new != self.mangle(match.group(1)) and '/dev/null' != match.group(1):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000273 # TODO(maruel): Can +++ be /dev/null? If so, assert self.is_delete ==
274 # True.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000275 self._fail('Unexpected git diff: %s != %s.' % (new, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000276 if lines:
277 self._fail('Crap after +++')
278 # We're done.
279 return
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000280
281 def _verify_svn_header(self):
282 """Sanity checks the header.
283
284 A svn diff can contain only property changes, in that case there will be no
285 proper header. To make things worse, this property change header is
286 localized.
287 """
288 lines = self.diff_header.splitlines()
maruel@chromium.org97366be2011-06-03 20:02:46 +0000289 last_line = ''
290
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000291 while lines:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000292 line = lines.pop(0)
293 self._verify_svn_header_process_line(lines, line, last_line)
294 last_line = line
295
296 # Cheap check to make sure the file name is at least mentioned in the
297 # 'diff' header. That the only remaining invariant.
298 if not self.filename in self.diff_header:
299 self._fail('Diff seems corrupted.')
300
301 def _verify_svn_header_process_line(self, lines, line, last_line):
302 """Processes a single line of the header.
303
304 Returns True if it should continue looping.
305 """
306 match = re.match(r'^--- ([^\t]+).*$', line)
307 if match:
308 if last_line[:3] in ('---', '+++'):
309 self._fail('--- and +++ are reversed')
310 self.is_new = match.group(1) == '/dev/null'
311 # For copy and renames, it's possible that the -- line doesn't match
312 # +++, so don't check match.group(1) to match self.filename or
313 # '/dev/null', it can be anything else.
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000314 # TODO(maruel): Handle rename/copy explicitly.
maruel@chromium.org97366be2011-06-03 20:02:46 +0000315 # if (self.mangle(match.group(1)) != self.filename and
316 # match.group(1) != '/dev/null'):
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000317 # self.source_file = match.group(1)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000318 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000319 self._fail('Nothing after header.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000320 return
321
322 match = re.match(r'^\+\+\+ ([^\t]+).*$', line)
323 if match:
324 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000325 self._fail('Unexpected diff: --- not following +++.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000326 if (self.mangle(match.group(1)) != self.filename and
327 match.group(1) != '/dev/null'):
328 # TODO(maruel): Can +++ be /dev/null? If so, assert self.is_delete ==
329 # True.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000330 self._fail('Unexpected diff: %s.' % match.group(1))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000331 if lines:
332 self._fail('Crap after +++')
333 # We're done.
334 return
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000335
336
337class PatchSet(object):
338 """A list of FilePatch* objects."""
339
340 def __init__(self, patches):
341 self.patches = patches
maruel@chromium.org8a1396c2011-04-22 00:14:24 +0000342 for p in self.patches:
343 assert isinstance(p, FilePatchBase)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000344
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000345 def set_relpath(self, relpath):
346 """Used to offset the patch into a subdirectory."""
347 for patch in self.patches:
348 patch.set_relpath(relpath)
349
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000350 def __iter__(self):
351 for patch in self.patches:
352 yield patch
353
354 @property
355 def filenames(self):
356 return [p.filename for p in self.patches]