blob: 77bb15991065f4fefd363335e26e59cad19dffa6 [file] [log] [blame]
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00001# coding=utf8
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Utility functions to handle patches."""
6
maruel@chromium.orgcd619402011-04-09 00:08:00 +00007import posixpath
8import os
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00009import re
10
11
12class UnsupportedPatchFormat(Exception):
13 def __init__(self, filename, status):
14 super(UnsupportedPatchFormat, self).__init__(filename, status)
15 self.filename = filename
16 self.status = status
17
18 def __str__(self):
19 out = 'Can\'t process patch for file %s.' % self.filename
20 if self.status:
21 out += '\n%s' % self.status
22 return out
23
24
25class FilePatchBase(object):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000026 """Defines a single file being modified.
27
28 '/' is always used instead of os.sep for consistency.
29 """
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000030 is_delete = False
31 is_binary = False
maruel@chromium.org97366be2011-06-03 20:02:46 +000032 is_new = False
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000033
maruel@chromium.orgcd619402011-04-09 00:08:00 +000034 def __init__(self, filename):
35 self.filename = None
36 self._set_filename(filename)
37
38 def _set_filename(self, filename):
39 self.filename = filename.replace('\\', '/')
40 # Blacklist a few characters for simplicity.
41 for i in ('%', '$', '..', '\'', '"'):
42 if i in self.filename:
43 self._fail('Can\'t use \'%s\' in filename.' % i)
44 for i in ('/', 'CON', 'COM'):
45 if self.filename.startswith(i):
46 self._fail('Filename can\'t start with \'%s\'.' % i)
47
maruel@chromium.org97366be2011-06-03 20:02:46 +000048 def get(self): # pragma: no coverage
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000049 raise NotImplementedError('Nothing to grab')
50
maruel@chromium.orgcd619402011-04-09 00:08:00 +000051 def set_relpath(self, relpath):
52 if not relpath:
53 return
54 relpath = relpath.replace('\\', '/')
55 if relpath[0] == '/':
56 self._fail('Relative path starts with %s' % relpath[0])
57 self._set_filename(posixpath.join(relpath, self.filename))
58
59 def _fail(self, msg):
60 raise UnsupportedPatchFormat(self.filename, msg)
61
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000062
63class FilePatchDelete(FilePatchBase):
64 """Deletes a file."""
65 is_delete = True
66
67 def __init__(self, filename, is_binary):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000068 super(FilePatchDelete, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000069 self.is_binary = is_binary
70
71 def get(self):
72 raise NotImplementedError('Nothing to grab')
73
74
75class FilePatchBinary(FilePatchBase):
76 """Content of a new binary file."""
77 is_binary = True
78
maruel@chromium.org97366be2011-06-03 20:02:46 +000079 def __init__(self, filename, data, svn_properties, is_new):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000080 super(FilePatchBinary, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000081 self.data = data
82 self.svn_properties = svn_properties or []
maruel@chromium.org97366be2011-06-03 20:02:46 +000083 self.is_new = is_new
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000084
85 def get(self):
86 return self.data
87
88
89class FilePatchDiff(FilePatchBase):
90 """Patch for a single file."""
91
92 def __init__(self, filename, diff, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000093 super(FilePatchDiff, self).__init__(filename)
maruel@chromium.org61e0b692011-04-12 21:01:01 +000094 if not diff:
95 self._fail('File doesn\'t have a diff.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +000096 self.diff_header, self.diff_hunks = self._split_header(diff)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000097 self.svn_properties = svn_properties or []
maruel@chromium.orgcd619402011-04-09 00:08:00 +000098 self.is_git_diff = self._is_git_diff_header(self.diff_header)
99 self.patchlevel = 0
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000100 if self.is_git_diff:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000101 self._verify_git_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000102 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000103 self._verify_svn_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000104
105 def get(self):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000106 return self.diff_header + self.diff_hunks
107
108 def set_relpath(self, relpath):
109 old_filename = self.filename
110 super(FilePatchDiff, self).set_relpath(relpath)
111 # Update the header too.
112 self.diff_header = self.diff_header.replace(old_filename, self.filename)
113
114 def _split_header(self, diff):
115 """Splits a diff in two: the header and the hunks."""
116 header = []
117 hunks = diff.splitlines(True)
118 while hunks:
119 header.append(hunks.pop(0))
120 if header[-1].startswith('--- '):
121 break
122 else:
123 # Some diff may not have a ---/+++ set like a git rename with no change or
124 # a svn diff with only property change.
125 pass
126
127 if hunks:
128 if not hunks[0].startswith('+++ '):
129 self._fail('Inconsistent header')
130 header.append(hunks.pop(0))
131 if hunks:
132 if not hunks[0].startswith('@@ '):
133 self._fail('Inconsistent hunk header')
134
135 # Mangle any \\ in the header to /.
136 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
137 basename = os.path.basename(self.filename)
138 for i in xrange(len(header)):
139 if (header[i].split(' ', 1)[0] in header_lines or
140 header[i].endswith(basename)):
141 header[i] = header[i].replace('\\', '/')
142 return ''.join(header), ''.join(hunks)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000143
144 @staticmethod
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000145 def _is_git_diff_header(diff_header):
146 """Returns True if the diff for a single files was generated with git."""
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000147 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
148 # Rename partial change:
149 # http://codereview.chromium.org/download/issue6250123_3013_6010.diff
150 # Rename no change:
151 # http://codereview.chromium.org/download/issue6287022_3001_4010.diff
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000152 return any(l.startswith('diff --git') for l in diff_header.splitlines())
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000153
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000154 def mangle(self, string):
155 """Mangle a file path."""
156 return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:])
157
158 def _verify_git_header(self):
159 """Sanity checks the header.
160
161 Expects the following format:
162
163 <garbagge>
164 diff --git (|a/)<filename> (|b/)<filename>
165 <similarity>
166 <filemode changes>
167 <index>
168 <copy|rename from>
169 <copy|rename to>
170 --- <filename>
171 +++ <filename>
172
173 Everything is optional except the diff --git line.
174 """
175 lines = self.diff_header.splitlines()
176
177 # Verify the diff --git line.
178 old = None
179 new = None
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000180 while lines:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000181 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
182 if not match:
183 continue
184 old = match.group(1).replace('\\', '/')
185 new = match.group(2).replace('\\', '/')
186 if old.startswith('a/') and new.startswith('b/'):
187 self.patchlevel = 1
188 old = old[2:]
189 new = new[2:]
190 # The rename is about the new file so the old file can be anything.
191 if new not in (self.filename, 'dev/null'):
192 self._fail('Unexpected git diff output name %s.' % new)
193 if old == 'dev/null' and new == 'dev/null':
194 self._fail('Unexpected /dev/null git diff.')
195 break
196
197 if not old or not new:
198 self._fail('Unexpected git diff; couldn\'t find git header.')
199
maruel@chromium.org97366be2011-06-03 20:02:46 +0000200 last_line = ''
201
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000202 while lines:
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000203 line = lines.pop(0)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000204 # TODO(maruel): old should be replace with self.source_file
205 # TODO(maruel): new == self.filename and remove new
206 self._verify_git_header_process_line(lines, line, last_line, old, new)
207 last_line = line
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000208
maruel@chromium.org97366be2011-06-03 20:02:46 +0000209 # Cheap check to make sure the file name is at least mentioned in the
210 # 'diff' header. That the only remaining invariant.
211 if not self.filename in self.diff_header:
212 self._fail('Diff seems corrupted.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000213
maruel@chromium.org97366be2011-06-03 20:02:46 +0000214 def _verify_git_header_process_line(self, lines, line, last_line, old, new):
215 """Processes a single line of the header.
216
217 Returns True if it should continue looping.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000218
219 Format is described to
220 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
maruel@chromium.org97366be2011-06-03 20:02:46 +0000221 """
maruel@chromium.org97366be2011-06-03 20:02:46 +0000222 match = re.match(r'^(rename|copy) from (.+)$', line)
223 if match:
224 if old != match.group(2):
225 self._fail('Unexpected git diff input name for line %s.' % line)
226 if not lines or not lines[0].startswith('%s to ' % match.group(1)):
227 self._fail(
228 'Confused %s from/to git diff for line %s.' %
229 (match.group(1), line))
230 return
231
maruel@chromium.org97366be2011-06-03 20:02:46 +0000232 match = re.match(r'^(rename|copy) to (.+)$', line)
233 if match:
234 if new != match.group(2):
235 self._fail('Unexpected git diff output name for line %s.' % line)
236 if not last_line.startswith('%s from ' % match.group(1)):
237 self._fail(
238 'Confused %s from/to git diff for line %s.' %
239 (match.group(1), line))
240 return
241
maruel@chromium.org378a4192011-06-06 13:36:02 +0000242 match = re.match(r'^new(| file) mode (\d{6})$', line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000243 if match:
maruel@chromium.org378a4192011-06-06 13:36:02 +0000244 mode = match.group(2)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000245 # Only look at owner ACL for executable.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000246 # TODO(maruel): Add support to remove a property.
maruel@chromium.org86eb9e72011-06-03 20:14:52 +0000247 if bool(int(mode[4]) & 1):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000248 self.svn_properties.append(('svn:executable', '*'))
249
maruel@chromium.org97366be2011-06-03 20:02:46 +0000250 match = re.match(r'^--- (.*)$', line)
251 if match:
252 if last_line[:3] in ('---', '+++'):
253 self._fail('--- and +++ are reversed')
254 self.is_new = match.group(1) == '/dev/null'
255 # TODO(maruel): Use self.source_file.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000256 if old != self.mangle(match.group(1)) and match.group(1) != '/dev/null':
257 self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000258 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000259 self._fail('Missing git diff output name.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000260 return
261
maruel@chromium.org97366be2011-06-03 20:02:46 +0000262 match = re.match(r'^\+\+\+ (.*)$', line)
263 if match:
264 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000265 self._fail('Unexpected git diff: --- not following +++.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000266 # TODO(maruel): new == self.filename.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000267 if new != self.mangle(match.group(1)) and '/dev/null' != match.group(1):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000268 # TODO(maruel): Can +++ be /dev/null? If so, assert self.is_delete ==
269 # True.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000270 self._fail('Unexpected git diff: %s != %s.' % (new, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000271 if lines:
272 self._fail('Crap after +++')
273 # We're done.
274 return
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000275
276 def _verify_svn_header(self):
277 """Sanity checks the header.
278
279 A svn diff can contain only property changes, in that case there will be no
280 proper header. To make things worse, this property change header is
281 localized.
282 """
283 lines = self.diff_header.splitlines()
maruel@chromium.org97366be2011-06-03 20:02:46 +0000284 last_line = ''
285
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000286 while lines:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000287 line = lines.pop(0)
288 self._verify_svn_header_process_line(lines, line, last_line)
289 last_line = line
290
291 # Cheap check to make sure the file name is at least mentioned in the
292 # 'diff' header. That the only remaining invariant.
293 if not self.filename in self.diff_header:
294 self._fail('Diff seems corrupted.')
295
296 def _verify_svn_header_process_line(self, lines, line, last_line):
297 """Processes a single line of the header.
298
299 Returns True if it should continue looping.
300 """
301 match = re.match(r'^--- ([^\t]+).*$', line)
302 if match:
303 if last_line[:3] in ('---', '+++'):
304 self._fail('--- and +++ are reversed')
305 self.is_new = match.group(1) == '/dev/null'
306 # For copy and renames, it's possible that the -- line doesn't match
307 # +++, so don't check match.group(1) to match self.filename or
308 # '/dev/null', it can be anything else.
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000309 # TODO(maruel): Handle rename/copy explicitly.
maruel@chromium.org97366be2011-06-03 20:02:46 +0000310 # if (self.mangle(match.group(1)) != self.filename and
311 # match.group(1) != '/dev/null'):
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000312 # self.source_file = match.group(1)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000313 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000314 self._fail('Nothing after header.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000315 return
316
317 match = re.match(r'^\+\+\+ ([^\t]+).*$', line)
318 if match:
319 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000320 self._fail('Unexpected diff: --- not following +++.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000321 if (self.mangle(match.group(1)) != self.filename and
322 match.group(1) != '/dev/null'):
323 # TODO(maruel): Can +++ be /dev/null? If so, assert self.is_delete ==
324 # True.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000325 self._fail('Unexpected diff: %s.' % match.group(1))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000326 if lines:
327 self._fail('Crap after +++')
328 # We're done.
329 return
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000330
331
332class PatchSet(object):
333 """A list of FilePatch* objects."""
334
335 def __init__(self, patches):
336 self.patches = patches
maruel@chromium.org8a1396c2011-04-22 00:14:24 +0000337 for p in self.patches:
338 assert isinstance(p, FilePatchBase)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000339
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000340 def set_relpath(self, relpath):
341 """Used to offset the patch into a subdirectory."""
342 for patch in self.patches:
343 patch.set_relpath(relpath)
344
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000345 def __iter__(self):
346 for patch in self.patches:
347 yield patch
348
349 @property
350 def filenames(self):
351 return [p.filename for p in self.patches]