blob: 888e10095a7e25d68eb791f81f6141df5bceb941 [file] [log] [blame]
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00001# coding=utf8
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Utility functions to handle patches."""
6
maruel@chromium.orgcd619402011-04-09 00:08:00 +00007import posixpath
8import os
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00009import re
10
11
12class UnsupportedPatchFormat(Exception):
13 def __init__(self, filename, status):
14 super(UnsupportedPatchFormat, self).__init__(filename, status)
15 self.filename = filename
16 self.status = status
17
18 def __str__(self):
19 out = 'Can\'t process patch for file %s.' % self.filename
20 if self.status:
21 out += '\n%s' % self.status
22 return out
23
24
25class FilePatchBase(object):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000026 """Defines a single file being modified.
27
28 '/' is always used instead of os.sep for consistency.
29 """
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000030 is_delete = False
31 is_binary = False
maruel@chromium.org97366be2011-06-03 20:02:46 +000032 is_new = False
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000033
maruel@chromium.orgcd619402011-04-09 00:08:00 +000034 def __init__(self, filename):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000035 self.filename = self._process_filename(filename)
maruel@chromium.orga19047c2011-09-08 12:49:58 +000036 # Set when the file is copied or moved.
37 self.source_filename = None
maruel@chromium.orgcd619402011-04-09 00:08:00 +000038
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000039 @staticmethod
40 def _process_filename(filename):
41 filename = filename.replace('\\', '/')
maruel@chromium.orgcd619402011-04-09 00:08:00 +000042 # Blacklist a few characters for simplicity.
43 for i in ('%', '$', '..', '\'', '"'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000044 if i in filename:
45 raise UnsupportedPatchFormat(
46 filename, 'Can\'t use \'%s\' in filename.' % i)
maruel@chromium.orgcd619402011-04-09 00:08:00 +000047 for i in ('/', 'CON', 'COM'):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000048 if filename.startswith(i):
49 raise UnsupportedPatchFormat(
50 filename, 'Filename can\'t start with \'%s\'.' % i)
51 return filename
maruel@chromium.orgcd619402011-04-09 00:08:00 +000052
maruel@chromium.org97366be2011-06-03 20:02:46 +000053 def get(self): # pragma: no coverage
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000054 raise NotImplementedError('Nothing to grab')
55
maruel@chromium.orgcd619402011-04-09 00:08:00 +000056 def set_relpath(self, relpath):
57 if not relpath:
58 return
59 relpath = relpath.replace('\\', '/')
60 if relpath[0] == '/':
61 self._fail('Relative path starts with %s' % relpath[0])
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000062 self.filename = self._process_filename(
63 posixpath.join(relpath, self.filename))
maruel@chromium.orga19047c2011-09-08 12:49:58 +000064 if self.source_filename:
65 self.source_filename = self._process_filename(
66 posixpath.join(relpath, self.source_filename))
maruel@chromium.orgcd619402011-04-09 00:08:00 +000067
68 def _fail(self, msg):
maruel@chromium.orgbe113f12011-09-01 15:05:34 +000069 """Shortcut function to raise UnsupportedPatchFormat."""
maruel@chromium.orgcd619402011-04-09 00:08:00 +000070 raise UnsupportedPatchFormat(self.filename, msg)
71
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000072
73class FilePatchDelete(FilePatchBase):
74 """Deletes a file."""
75 is_delete = True
76
77 def __init__(self, filename, is_binary):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000078 super(FilePatchDelete, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000079 self.is_binary = is_binary
80
81 def get(self):
82 raise NotImplementedError('Nothing to grab')
83
84
85class FilePatchBinary(FilePatchBase):
86 """Content of a new binary file."""
87 is_binary = True
88
maruel@chromium.org97366be2011-06-03 20:02:46 +000089 def __init__(self, filename, data, svn_properties, is_new):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000090 super(FilePatchBinary, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000091 self.data = data
92 self.svn_properties = svn_properties or []
maruel@chromium.org97366be2011-06-03 20:02:46 +000093 self.is_new = is_new
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000094
95 def get(self):
96 return self.data
97
98
99class FilePatchDiff(FilePatchBase):
100 """Patch for a single file."""
101
102 def __init__(self, filename, diff, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000103 super(FilePatchDiff, self).__init__(filename)
maruel@chromium.org61e0b692011-04-12 21:01:01 +0000104 if not diff:
105 self._fail('File doesn\'t have a diff.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000106 self.diff_header, self.diff_hunks = self._split_header(diff)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000107 self.svn_properties = svn_properties or []
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000108 self.is_git_diff = self._is_git_diff_header(self.diff_header)
109 self.patchlevel = 0
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000110 if self.is_git_diff:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000111 self._verify_git_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000112 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000113 self._verify_svn_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000114
115 def get(self):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000116 return self.diff_header + self.diff_hunks
117
118 def set_relpath(self, relpath):
119 old_filename = self.filename
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000120 old_source_filename = self.source_filename or self.filename
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000121 super(FilePatchDiff, self).set_relpath(relpath)
122 # Update the header too.
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000123 source_filename = self.source_filename or self.filename
124 lines = self.diff_header.splitlines(True)
125 for i, line in enumerate(lines):
126 if line.startswith('diff --git'):
127 lines[i] = line.replace(
128 'a/' + old_source_filename, source_filename).replace(
129 'b/' + old_filename, self.filename)
130 elif re.match(r'^\w+ from .+$', line) or line.startswith('---'):
131 lines[i] = line.replace(old_source_filename, source_filename)
132 elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'):
133 lines[i] = line.replace(old_filename, self.filename)
134 self.diff_header = ''.join(lines)
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000135
136 def _split_header(self, diff):
137 """Splits a diff in two: the header and the hunks."""
138 header = []
139 hunks = diff.splitlines(True)
140 while hunks:
141 header.append(hunks.pop(0))
142 if header[-1].startswith('--- '):
143 break
144 else:
145 # Some diff may not have a ---/+++ set like a git rename with no change or
146 # a svn diff with only property change.
147 pass
148
149 if hunks:
150 if not hunks[0].startswith('+++ '):
151 self._fail('Inconsistent header')
152 header.append(hunks.pop(0))
153 if hunks:
154 if not hunks[0].startswith('@@ '):
155 self._fail('Inconsistent hunk header')
156
157 # Mangle any \\ in the header to /.
158 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
159 basename = os.path.basename(self.filename)
160 for i in xrange(len(header)):
161 if (header[i].split(' ', 1)[0] in header_lines or
162 header[i].endswith(basename)):
163 header[i] = header[i].replace('\\', '/')
164 return ''.join(header), ''.join(hunks)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000165
166 @staticmethod
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000167 def _is_git_diff_header(diff_header):
168 """Returns True if the diff for a single files was generated with git."""
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000169 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
170 # Rename partial change:
171 # http://codereview.chromium.org/download/issue6250123_3013_6010.diff
172 # Rename no change:
173 # http://codereview.chromium.org/download/issue6287022_3001_4010.diff
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000174 return any(l.startswith('diff --git') for l in diff_header.splitlines())
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000175
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000176 def mangle(self, string):
177 """Mangle a file path."""
178 return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:])
179
180 def _verify_git_header(self):
181 """Sanity checks the header.
182
183 Expects the following format:
184
185 <garbagge>
186 diff --git (|a/)<filename> (|b/)<filename>
187 <similarity>
188 <filemode changes>
189 <index>
190 <copy|rename from>
191 <copy|rename to>
192 --- <filename>
193 +++ <filename>
194
195 Everything is optional except the diff --git line.
196 """
197 lines = self.diff_header.splitlines()
198
199 # Verify the diff --git line.
200 old = None
201 new = None
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000202 while lines:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000203 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
204 if not match:
205 continue
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000206 if match.group(1).startswith('a/') and match.group(2).startswith('b/'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000207 self.patchlevel = 1
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000208 old = self.mangle(match.group(1))
209 new = self.mangle(match.group(2))
210
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000211 # The rename is about the new file so the old file can be anything.
212 if new not in (self.filename, 'dev/null'):
213 self._fail('Unexpected git diff output name %s.' % new)
214 if old == 'dev/null' and new == 'dev/null':
215 self._fail('Unexpected /dev/null git diff.')
216 break
217
218 if not old or not new:
219 self._fail('Unexpected git diff; couldn\'t find git header.')
220
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000221 if old not in (self.filename, 'dev/null'):
222 # Copy or rename.
223 self.source_filename = old
224
maruel@chromium.org97366be2011-06-03 20:02:46 +0000225 last_line = ''
226
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000227 while lines:
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000228 line = lines.pop(0)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000229 self._verify_git_header_process_line(lines, line, last_line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000230 last_line = line
maruel@chromium.orgb6ffdaf2011-06-03 19:23:16 +0000231
maruel@chromium.org97366be2011-06-03 20:02:46 +0000232 # Cheap check to make sure the file name is at least mentioned in the
233 # 'diff' header. That the only remaining invariant.
234 if not self.filename in self.diff_header:
235 self._fail('Diff seems corrupted.')
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000236
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000237 def _verify_git_header_process_line(self, lines, line, last_line):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000238 """Processes a single line of the header.
239
240 Returns True if it should continue looping.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000241
242 Format is described to
243 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
maruel@chromium.org97366be2011-06-03 20:02:46 +0000244 """
maruel@chromium.org97366be2011-06-03 20:02:46 +0000245 match = re.match(r'^(rename|copy) from (.+)$', line)
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000246 old = self.source_filename or self.filename
maruel@chromium.org97366be2011-06-03 20:02:46 +0000247 if match:
248 if old != match.group(2):
249 self._fail('Unexpected git diff input name for line %s.' % line)
250 if not lines or not lines[0].startswith('%s to ' % match.group(1)):
251 self._fail(
252 'Confused %s from/to git diff for line %s.' %
253 (match.group(1), line))
254 return
255
maruel@chromium.org97366be2011-06-03 20:02:46 +0000256 match = re.match(r'^(rename|copy) to (.+)$', line)
257 if match:
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000258 if self.filename != match.group(2):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000259 self._fail('Unexpected git diff output name for line %s.' % line)
260 if not last_line.startswith('%s from ' % match.group(1)):
261 self._fail(
262 'Confused %s from/to git diff for line %s.' %
263 (match.group(1), line))
264 return
265
maruel@chromium.org378a4192011-06-06 13:36:02 +0000266 match = re.match(r'^new(| file) mode (\d{6})$', line)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000267 if match:
maruel@chromium.org378a4192011-06-06 13:36:02 +0000268 mode = match.group(2)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000269 # Only look at owner ACL for executable.
maruel@chromium.org378a4192011-06-06 13:36:02 +0000270 # TODO(maruel): Add support to remove a property.
maruel@chromium.org86eb9e72011-06-03 20:14:52 +0000271 if bool(int(mode[4]) & 1):
maruel@chromium.org97366be2011-06-03 20:02:46 +0000272 self.svn_properties.append(('svn:executable', '*'))
273
maruel@chromium.org97366be2011-06-03 20:02:46 +0000274 match = re.match(r'^--- (.*)$', line)
275 if match:
276 if last_line[:3] in ('---', '+++'):
277 self._fail('--- and +++ are reversed')
278 self.is_new = match.group(1) == '/dev/null'
279 # TODO(maruel): Use self.source_file.
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000280 if self.mangle(match.group(1)) not in (old, 'dev/null'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000281 self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000282 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000283 self._fail('Missing git diff output name.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000284 return
285
maruel@chromium.org97366be2011-06-03 20:02:46 +0000286 match = re.match(r'^\+\+\+ (.*)$', line)
287 if match:
288 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000289 self._fail('Unexpected git diff: --- not following +++.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000290 # TODO(maruel): new == self.filename.
maruel@chromium.orgbe605652011-09-02 20:28:07 +0000291 if '/dev/null' == match.group(1):
292 self.is_delete = True
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000293 elif self.filename != self.mangle(match.group(1)):
294 self._fail(
295 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1)))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000296 if lines:
297 self._fail('Crap after +++')
298 # We're done.
299 return
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000300
301 def _verify_svn_header(self):
302 """Sanity checks the header.
303
304 A svn diff can contain only property changes, in that case there will be no
305 proper header. To make things worse, this property change header is
306 localized.
307 """
308 lines = self.diff_header.splitlines()
maruel@chromium.org97366be2011-06-03 20:02:46 +0000309 last_line = ''
310
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000311 while lines:
maruel@chromium.org97366be2011-06-03 20:02:46 +0000312 line = lines.pop(0)
313 self._verify_svn_header_process_line(lines, line, last_line)
314 last_line = line
315
316 # Cheap check to make sure the file name is at least mentioned in the
317 # 'diff' header. That the only remaining invariant.
318 if not self.filename in self.diff_header:
319 self._fail('Diff seems corrupted.')
320
321 def _verify_svn_header_process_line(self, lines, line, last_line):
322 """Processes a single line of the header.
323
324 Returns True if it should continue looping.
325 """
326 match = re.match(r'^--- ([^\t]+).*$', line)
327 if match:
328 if last_line[:3] in ('---', '+++'):
329 self._fail('--- and +++ are reversed')
330 self.is_new = match.group(1) == '/dev/null'
maruel@chromium.orga19047c2011-09-08 12:49:58 +0000331 if (self.mangle(match.group(1)) != self.filename and
332 match.group(1) != '/dev/null'):
333 self.source_filename = match.group(1)
maruel@chromium.org97366be2011-06-03 20:02:46 +0000334 if not lines or not lines[0].startswith('+++'):
maruel@chromium.orgc4b5e762011-04-20 23:56:08 +0000335 self._fail('Nothing after header.')
maruel@chromium.org97366be2011-06-03 20:02:46 +0000336 return
337
338 match = re.match(r'^\+\+\+ ([^\t]+).*$', line)
339 if match:
340 if not last_line.startswith('---'):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000341 self._fail('Unexpected diff: --- not following +++.')
maruel@chromium.orgbe605652011-09-02 20:28:07 +0000342 if match.group(1) == '/dev/null':
343 self.is_delete = True
344 elif self.mangle(match.group(1)) != self.filename:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000345 self._fail('Unexpected diff: %s.' % match.group(1))
maruel@chromium.org97366be2011-06-03 20:02:46 +0000346 if lines:
347 self._fail('Crap after +++')
348 # We're done.
349 return
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000350
351
352class PatchSet(object):
353 """A list of FilePatch* objects."""
354
355 def __init__(self, patches):
356 self.patches = patches
maruel@chromium.org8a1396c2011-04-22 00:14:24 +0000357 for p in self.patches:
358 assert isinstance(p, FilePatchBase)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000359
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000360 def set_relpath(self, relpath):
361 """Used to offset the patch into a subdirectory."""
362 for patch in self.patches:
363 patch.set_relpath(relpath)
364
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000365 def __iter__(self):
366 for patch in self.patches:
367 yield patch
368
369 @property
370 def filenames(self):
371 return [p.filename for p in self.patches]