blob: 8b0407fab0aaec979a2d66b5a86d1b4484e1c820 [file] [log] [blame]
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00001# coding=utf8
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Utility functions to handle patches."""
6
maruel@chromium.orgcd619402011-04-09 00:08:00 +00007import posixpath
8import os
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00009import re
10
11
12class UnsupportedPatchFormat(Exception):
13 def __init__(self, filename, status):
14 super(UnsupportedPatchFormat, self).__init__(filename, status)
15 self.filename = filename
16 self.status = status
17
18 def __str__(self):
19 out = 'Can\'t process patch for file %s.' % self.filename
20 if self.status:
21 out += '\n%s' % self.status
22 return out
23
24
25class FilePatchBase(object):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000026 """Defines a single file being modified.
27
28 '/' is always used instead of os.sep for consistency.
29 """
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000030 is_delete = False
31 is_binary = False
maruel@chromium.org97366be2011-06-03 20:02:46 +000032 is_new = False
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000033
maruel@chromium.orgcd619402011-04-09 00:08:00 +000034 def __init__(self, filename):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000035 self.filename = self._process_filename(filename)
maruel@chromium.orga19047c2011-09-08 12:49:58 +000036 # Set when the file is copied or moved.
37 self.source_filename = None
maruel@chromium.orgcd619402011-04-09 00:08:00 +000038
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000039 @staticmethod
40 def _process_filename(filename):
41 filename = filename.replace('\\', '/')
maruel@chromium.orgcd619402011-04-09 00:08:00 +000042 # Blacklist a few characters for simplicity.
43 for i in ('%', '$', '..', '\'', '"'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000044 if i in filename:
45 raise UnsupportedPatchFormat(
46 filename, 'Can\'t use \'%s\' in filename.' % i)
maruel@chromium.orgcd619402011-04-09 00:08:00 +000047 for i in ('/', 'CON', 'COM'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000048 if filename.startswith(i):
49 raise UnsupportedPatchFormat(
50 filename, 'Filename can\'t start with \'%s\'.' % i)
51 return filename
maruel@chromium.orgcd619402011-04-09 00:08:00 +000052
maruel@chromium.org97366be2011-06-03 20:02:46 +000053 def get(self): # pragma: no coverage
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000054 raise NotImplementedError('Nothing to grab')
55
maruel@chromium.orgcd619402011-04-09 00:08:00 +000056 def set_relpath(self, relpath):
57 if not relpath:
58 return
59 relpath = relpath.replace('\\', '/')
60 if relpath[0] == '/':
61 self._fail('Relative path starts with %s' % relpath[0])
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000062 self.filename = self._process_filename(
63 posixpath.join(relpath, self.filename))
maruel@chromium.orga19047c2011-09-08 12:49:58 +000064 if self.source_filename:
65 self.source_filename = self._process_filename(
66 posixpath.join(relpath, self.source_filename))
maruel@chromium.orgcd619402011-04-09 00:08:00 +000067
68 def _fail(self, msg):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000069 """Shortcut function to raise UnsupportedPatchFormat."""
maruel@chromium.orgcd619402011-04-09 00:08:00 +000070 raise UnsupportedPatchFormat(self.filename, msg)
71
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000072
73class FilePatchDelete(FilePatchBase):
74 """Deletes a file."""
75 is_delete = True
76
77 def __init__(self, filename, is_binary):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000078 super(FilePatchDelete, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000079 self.is_binary = is_binary
80
81 def get(self):
82 raise NotImplementedError('Nothing to grab')
83
84
85class FilePatchBinary(FilePatchBase):
86 """Content of a new binary file."""
87 is_binary = True
88
maruel@chromium.org97366be2011-06-03 20:02:46 +000089 def __init__(self, filename, data, svn_properties, is_new):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000090 super(FilePatchBinary, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000091 self.data = data
92 self.svn_properties = svn_properties or []
maruel@chromium.org97366be2011-06-03 20:02:46 +000093 self.is_new = is_new
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000094
95 def get(self):
96 return self.data
97
98
99class FilePatchDiff(FilePatchBase):
100 """Patch for a single file."""
101
102 def __init__(self, filename, diff, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000103 super(FilePatchDiff, self).__init__(filename)
maruel@chromium.org61e0b692011-04-12 21:01:01 +0000104 if not diff:
105 self._fail('File doesn\'t have a diff.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000106 self.diff_header, self.diff_hunks = self._split_header(diff)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000107 self.svn_properties = svn_properties or []
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000108 self.is_git_diff = self._is_git_diff_header(self.diff_header)
109 self.patchlevel = 0
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000110 if self.is_git_diff:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000111 self._verify_git_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000112 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000113 self._verify_svn_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000114
115 def get(self):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000116 return self.diff_header + self.diff_hunks
117
118 def set_relpath(self, relpath):
119 old_filename = self.filename
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000120 old_source_filename = self.source_filename or self.filename
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000121 super(FilePatchDiff, self).set_relpath(relpath)
122 # Update the header too.
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000123 source_filename = self.source_filename or self.filename
124 lines = self.diff_header.splitlines(True)
125 for i, line in enumerate(lines):
126 if line.startswith('diff --git'):
127 lines[i] = line.replace(
128 'a/' + old_source_filename, source_filename).replace(
129 'b/' + old_filename, self.filename)
130 elif re.match(r'^\w+ from .+$', line) or line.startswith('---'):
131 lines[i] = line.replace(old_source_filename, source_filename)
132 elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'):
133 lines[i] = line.replace(old_filename, self.filename)
134 self.diff_header = ''.join(lines)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000135
136 def _split_header(self, diff):
137 """Splits a diff in two: the header and the hunks."""
138 header = []
139 hunks = diff.splitlines(True)
140 while hunks:
141 header.append(hunks.pop(0))
142 if header[-1].startswith('--- '):
143 break
144 else:
145 # Some diff may not have a ---/+++ set like a git rename with no change or
146 # a svn diff with only property change.
147 pass
148
149 if hunks:
150 if not hunks[0].startswith('+++ '):
151 self._fail('Inconsistent header')
152 header.append(hunks.pop(0))
153 if hunks:
154 if not hunks[0].startswith('@@ '):
155 self._fail('Inconsistent hunk header')
156
157 # Mangle any \\ in the header to /.
158 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
159 basename = os.path.basename(self.filename)
160 for i in xrange(len(header)):
161 if (header[i].split(' ', 1)[0] in header_lines or
162 header[i].endswith(basename)):
163 header[i] = header[i].replace('\\', '/')
164 return ''.join(header), ''.join(hunks)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000165
166 @staticmethod
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000167 def _is_git_diff_header(diff_header):
168 """Returns True if the diff for a single files was generated with git."""
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000169 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
170 # Rename partial change:
171 # http://codereview.chromium.org/download/issue6250123_3013_6010.diff
172 # Rename no change:
173 # http://codereview.chromium.org/download/issue6287022_3001_4010.diff
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000174 return any(l.startswith('diff --git') for l in diff_header.splitlines())
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000175
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000176 def mangle(self, string):
177 """Mangle a file path."""
178 return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:])
179
180 def _verify_git_header(self):
181 """Sanity checks the header.
182
183 Expects the following format:
184
185 <garbagge>
186 diff --git (|a/)<filename> (|b/)<filename>
187 <similarity>
188 <filemode changes>
189 <index>
190 <copy|rename from>
191 <copy|rename to>
192 --- <filename>
193 +++ <filename>
194
195 Everything is optional except the diff --git line.
196 """
197 lines = self.diff_header.splitlines()
198
199 # Verify the diff --git line.
200 old = None
201 new = None
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000202 while lines:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000203 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
204 if not match:
205 continue
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000206 if match.group(1).startswith('a/') and match.group(2).startswith('b/'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000207 self.patchlevel = 1
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000208 old = self.mangle(match.group(1))
209 new = self.mangle(match.group(2))
210
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000211 # The rename is about the new file so the old file can be anything.
212 if new not in (self.filename, 'dev/null'):
213 self._fail('Unexpected git diff output name %s.' % new)
214 if old == 'dev/null' and new == 'dev/null':
215 self._fail('Unexpected /dev/null git diff.')
216 break
217
218 if not old or not new:
219 self._fail('Unexpected git diff; couldn\'t find git header.')
220
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000221 if old not in (self.filename, 'dev/null'):
222 # Copy or rename.
223 self.source_filename = old
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000224 self.is_new = True
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000225
maruel@chromium.org97366be2011-06-03 20:02:46 +0000226 last_line = ''
227
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000228 while lines:
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000229 line = lines.pop(0)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000230 self._verify_git_header_process_line(lines, line, last_line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000231 last_line = line
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000232
maruel@chromium.org97366be2011-06-03 20:02:46 +0000233 # Cheap check to make sure the file name is at least mentioned in the
234 # 'diff' header. That the only remaining invariant.
235 if not self.filename in self.diff_header:
236 self._fail('Diff seems corrupted.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000237
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000238 def _verify_git_header_process_line(self, lines, line, last_line):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000239 """Processes a single line of the header.
240
241 Returns True if it should continue looping.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000242
243 Format is described to
244 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
maruel@chromium.org97366be2011-06-03 20:02:46 +0000245 """
maruel@chromium.org97366be2011-06-03 20:02:46 +0000246 match = re.match(r'^(rename|copy) from (.+)$', line)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000247 old = self.source_filename or self.filename
maruel@chromium.org97366be2011-06-03 20:02:46 +0000248 if match:
249 if old != match.group(2):
250 self._fail('Unexpected git diff input name for line %s.' % line)
251 if not lines or not lines[0].startswith('%s to ' % match.group(1)):
252 self._fail(
253 'Confused %s from/to git diff for line %s.' %
254 (match.group(1), line))
255 return
256
maruel@chromium.org97366be2011-06-03 20:02:46 +0000257 match = re.match(r'^(rename|copy) to (.+)$', line)
258 if match:
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000259 if self.filename != match.group(2):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000260 self._fail('Unexpected git diff output name for line %s.' % line)
261 if not last_line.startswith('%s from ' % match.group(1)):
262 self._fail(
263 'Confused %s from/to git diff for line %s.' %
264 (match.group(1), line))
265 return
266
maruel@chromium.org378a4192011-06-06 13:36:02 +0000267 match = re.match(r'^new(| file) mode (\d{6})$', line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000268 if match:
maruel@chromium.org378a4192011-06-06 13:36:02 +0000269 mode = match.group(2)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000270 # Only look at owner ACL for executable.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000271 # TODO(maruel): Add support to remove a property.
maruel@chromium.org86eb9e72011-06-03 20:14:52 +0000272 if bool(int(mode[4]) & 1):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000273 self.svn_properties.append(('svn:executable', '*'))
274
maruel@chromium.org97366be2011-06-03 20:02:46 +0000275 match = re.match(r'^--- (.*)$', line)
276 if match:
277 if last_line[:3] in ('---', '+++'):
278 self._fail('--- and +++ are reversed')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000279 if match.group(1) == '/dev/null':
280 self.is_new = True
281 elif self.mangle(match.group(1)) != old:
282 # git patches are always well formatted, do not allow random filenames.
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000283 self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000284 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000285 self._fail('Missing git diff output name.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000286 return
287
maruel@chromium.org97366be2011-06-03 20:02:46 +0000288 match = re.match(r'^\+\+\+ (.*)$', line)
289 if match:
290 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000291 self._fail('Unexpected git diff: --- not following +++.')
maruel@chromium.orgbe605652011-09-02 20:28:07 +0000292 if '/dev/null' == match.group(1):
293 self.is_delete = True
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000294 elif self.filename != self.mangle(match.group(1)):
295 self._fail(
296 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000297 if lines:
298 self._fail('Crap after +++')
299 # We're done.
300 return
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000301
302 def _verify_svn_header(self):
303 """Sanity checks the header.
304
305 A svn diff can contain only property changes, in that case there will be no
306 proper header. To make things worse, this property change header is
307 localized.
308 """
309 lines = self.diff_header.splitlines()
maruel@chromium.org97366be2011-06-03 20:02:46 +0000310 last_line = ''
311
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000312 while lines:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000313 line = lines.pop(0)
314 self._verify_svn_header_process_line(lines, line, last_line)
315 last_line = line
316
317 # Cheap check to make sure the file name is at least mentioned in the
318 # 'diff' header. That the only remaining invariant.
319 if not self.filename in self.diff_header:
320 self._fail('Diff seems corrupted.')
321
322 def _verify_svn_header_process_line(self, lines, line, last_line):
323 """Processes a single line of the header.
324
325 Returns True if it should continue looping.
326 """
327 match = re.match(r'^--- ([^\t]+).*$', line)
328 if match:
329 if last_line[:3] in ('---', '+++'):
330 self._fail('--- and +++ are reversed')
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000331 if match.group(1) == '/dev/null':
332 self.is_new = True
333 elif self.mangle(match.group(1)) != self.filename:
334 # guess the source filename.
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000335 self.source_filename = match.group(1)
maruel@chromium.org8baaea72011-09-08 12:55:29 +0000336 self.is_new = True
maruel@chromium.org97366be2011-06-03 20:02:46 +0000337 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000338 self._fail('Nothing after header.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000339 return
340
341 match = re.match(r'^\+\+\+ ([^\t]+).*$', line)
342 if match:
343 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000344 self._fail('Unexpected diff: --- not following +++.')
maruel@chromium.orgbe605652011-09-02 20:28:07 +0000345 if match.group(1) == '/dev/null':
346 self.is_delete = True
347 elif self.mangle(match.group(1)) != self.filename:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000348 self._fail('Unexpected diff: %s.' % match.group(1))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000349 if lines:
350 self._fail('Crap after +++')
351 # We're done.
352 return
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000353
354
355class PatchSet(object):
356 """A list of FilePatch* objects."""
357
358 def __init__(self, patches):
359 self.patches = patches
maruel@chromium.org8a1396c2011-04-22 00:14:24 +0000360 for p in self.patches:
361 assert isinstance(p, FilePatchBase)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000362
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000363 def set_relpath(self, relpath):
364 """Used to offset the patch into a subdirectory."""
365 for patch in self.patches:
366 patch.set_relpath(relpath)
367
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000368 def __iter__(self):
369 for patch in self.patches:
370 yield patch
371
372 @property
373 def filenames(self):
374 return [p.filename for p in self.patches]