blob: 18c16e2c82c6d0c41433ed7f041abfdf1686b95f [file] [log] [blame]
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00001# coding=utf8
2# Copyright (c) 2011 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Utility functions to handle patches."""
6
maruel@chromium.orgcd619402011-04-09 00:08:00 +00007import posixpath
8import os
maruel@chromium.orgb3727a32011-04-04 19:31:44 +00009import re
10
11
12class UnsupportedPatchFormat(Exception):
13 def __init__(self, filename, status):
14 super(UnsupportedPatchFormat, self).__init__(filename, status)
15 self.filename = filename
16 self.status = status
17
18 def __str__(self):
19 out = 'Can\'t process patch for file %s.' % self.filename
20 if self.status:
21 out += '\n%s' % self.status
22 return out
23
24
25class FilePatchBase(object):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000026 """Defines a single file being modified.
27
28 '/' is always used instead of os.sep for consistency.
29 """
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000030 is_delete = False
31 is_binary = False
32
maruel@chromium.orgcd619402011-04-09 00:08:00 +000033 def __init__(self, filename):
34 self.filename = None
35 self._set_filename(filename)
36
37 def _set_filename(self, filename):
38 self.filename = filename.replace('\\', '/')
39 # Blacklist a few characters for simplicity.
40 for i in ('%', '$', '..', '\'', '"'):
41 if i in self.filename:
42 self._fail('Can\'t use \'%s\' in filename.' % i)
43 for i in ('/', 'CON', 'COM'):
44 if self.filename.startswith(i):
45 self._fail('Filename can\'t start with \'%s\'.' % i)
46
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000047 def get(self):
48 raise NotImplementedError('Nothing to grab')
49
maruel@chromium.orgcd619402011-04-09 00:08:00 +000050 def set_relpath(self, relpath):
51 if not relpath:
52 return
53 relpath = relpath.replace('\\', '/')
54 if relpath[0] == '/':
55 self._fail('Relative path starts with %s' % relpath[0])
56 self._set_filename(posixpath.join(relpath, self.filename))
57
58 def _fail(self, msg):
59 raise UnsupportedPatchFormat(self.filename, msg)
60
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000061
62class FilePatchDelete(FilePatchBase):
63 """Deletes a file."""
64 is_delete = True
65
66 def __init__(self, filename, is_binary):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000067 super(FilePatchDelete, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000068 self.is_binary = is_binary
69
70 def get(self):
71 raise NotImplementedError('Nothing to grab')
72
73
74class FilePatchBinary(FilePatchBase):
75 """Content of a new binary file."""
76 is_binary = True
77
78 def __init__(self, filename, data, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000079 super(FilePatchBinary, self).__init__(filename)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000080 self.data = data
81 self.svn_properties = svn_properties or []
82
83 def get(self):
84 return self.data
85
86
87class FilePatchDiff(FilePatchBase):
88 """Patch for a single file."""
89
90 def __init__(self, filename, diff, svn_properties):
maruel@chromium.orgcd619402011-04-09 00:08:00 +000091 super(FilePatchDiff, self).__init__(filename)
92 self.diff_header, self.diff_hunks = self._split_header(diff)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000093 self.svn_properties = svn_properties or []
maruel@chromium.orgcd619402011-04-09 00:08:00 +000094 self.is_git_diff = self._is_git_diff_header(self.diff_header)
95 self.patchlevel = 0
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000096 if self.is_git_diff:
maruel@chromium.orgcd619402011-04-09 00:08:00 +000097 self._verify_git_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +000098 assert not svn_properties
99 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000100 self._verify_svn_header()
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000101
102 def get(self):
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000103 return self.diff_header + self.diff_hunks
104
105 def set_relpath(self, relpath):
106 old_filename = self.filename
107 super(FilePatchDiff, self).set_relpath(relpath)
108 # Update the header too.
109 self.diff_header = self.diff_header.replace(old_filename, self.filename)
110
111 def _split_header(self, diff):
112 """Splits a diff in two: the header and the hunks."""
113 header = []
114 hunks = diff.splitlines(True)
115 while hunks:
116 header.append(hunks.pop(0))
117 if header[-1].startswith('--- '):
118 break
119 else:
120 # Some diff may not have a ---/+++ set like a git rename with no change or
121 # a svn diff with only property change.
122 pass
123
124 if hunks:
125 if not hunks[0].startswith('+++ '):
126 self._fail('Inconsistent header')
127 header.append(hunks.pop(0))
128 if hunks:
129 if not hunks[0].startswith('@@ '):
130 self._fail('Inconsistent hunk header')
131
132 # Mangle any \\ in the header to /.
133 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
134 basename = os.path.basename(self.filename)
135 for i in xrange(len(header)):
136 if (header[i].split(' ', 1)[0] in header_lines or
137 header[i].endswith(basename)):
138 header[i] = header[i].replace('\\', '/')
139 return ''.join(header), ''.join(hunks)
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000140
141 @staticmethod
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000142 def _is_git_diff_header(diff_header):
143 """Returns True if the diff for a single files was generated with git."""
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000144 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
145 # Rename partial change:
146 # http://codereview.chromium.org/download/issue6250123_3013_6010.diff
147 # Rename no change:
148 # http://codereview.chromium.org/download/issue6287022_3001_4010.diff
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000149 return any(l.startswith('diff --git') for l in diff_header.splitlines())
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000150
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000151 def mangle(self, string):
152 """Mangle a file path."""
153 return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:])
154
155 def _verify_git_header(self):
156 """Sanity checks the header.
157
158 Expects the following format:
159
160 <garbagge>
161 diff --git (|a/)<filename> (|b/)<filename>
162 <similarity>
163 <filemode changes>
164 <index>
165 <copy|rename from>
166 <copy|rename to>
167 --- <filename>
168 +++ <filename>
169
170 Everything is optional except the diff --git line.
171 """
172 lines = self.diff_header.splitlines()
173
174 # Verify the diff --git line.
175 old = None
176 new = None
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000177 while lines:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000178 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
179 if not match:
180 continue
181 old = match.group(1).replace('\\', '/')
182 new = match.group(2).replace('\\', '/')
183 if old.startswith('a/') and new.startswith('b/'):
184 self.patchlevel = 1
185 old = old[2:]
186 new = new[2:]
187 # The rename is about the new file so the old file can be anything.
188 if new not in (self.filename, 'dev/null'):
189 self._fail('Unexpected git diff output name %s.' % new)
190 if old == 'dev/null' and new == 'dev/null':
191 self._fail('Unexpected /dev/null git diff.')
192 break
193
194 if not old or not new:
195 self._fail('Unexpected git diff; couldn\'t find git header.')
196
197 # Handle these:
198 # rename from <>
199 # rename to <>
200 # copy from <>
201 # copy to <>
202 while lines:
203 if lines[0].startswith('--- '):
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000204 break
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000205 match = re.match(r'^(rename|copy) from (.+)$', lines.pop(0))
206 if not match:
207 continue
208 if old != match.group(2):
209 self._fail('Unexpected git diff input name for %s.' % match.group(1))
210 if not lines:
211 self._fail('Missing git diff output name for %s.' % match.group(1))
212 match = re.match(r'^(rename|copy) to (.+)$', lines.pop(0))
213 if not match:
214 self._fail('Missing git diff output name for %s.' % match.group(1))
215 if new != match.group(2):
216 self._fail('Unexpected git diff output name for %s.' % match.group(1))
217
218 # Handle ---/+++
219 while lines:
220 match = re.match(r'^--- (.*)$', lines.pop(0))
221 if not match:
222 continue
223 if old != self.mangle(match.group(1)) and match.group(1) != '/dev/null':
224 self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
225 if not lines:
226 self._fail('Missing git diff output name.')
227 match = re.match(r'^\+\+\+ (.*)$', lines.pop(0))
228 if not match:
229 self._fail('Unexpected git diff: --- not following +++.')
230 if new != self.mangle(match.group(1)) and '/dev/null' != match.group(1):
231 self._fail('Unexpected git diff: %s != %s.' % (new, match.group(1)))
232 assert not lines, '_split_header() is broken'
233 break
234
235 def _verify_svn_header(self):
236 """Sanity checks the header.
237
238 A svn diff can contain only property changes, in that case there will be no
239 proper header. To make things worse, this property change header is
240 localized.
241 """
242 lines = self.diff_header.splitlines()
243 while lines:
244 match = re.match(r'^--- ([^\t]+).*$', lines.pop(0))
245 if not match:
246 continue
247 if match.group(1) not in (self.filename, '/dev/null'):
248 self._fail('Unexpected diff: %s.' % match.group(1))
249 match = re.match(r'^\+\+\+ ([^\t]+).*$', lines.pop(0))
250 if not match:
251 self._fail('Unexpected diff: --- not following +++.')
252 if match.group(1) not in (self.filename, '/dev/null'):
253 self._fail('Unexpected diff: %s.' % match.group(1))
254 assert not lines, '_split_header() is broken'
255 break
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000256 else:
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000257 # Cheap check to make sure the file name is at least mentioned in the
258 # 'diff' header. That the only remaining invariant.
259 if not self.filename in self.diff_header:
260 self._fail('Diff seems corrupted.')
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000261
262
263class PatchSet(object):
264 """A list of FilePatch* objects."""
265
266 def __init__(self, patches):
267 self.patches = patches
268
maruel@chromium.orgcd619402011-04-09 00:08:00 +0000269 def set_relpath(self, relpath):
270 """Used to offset the patch into a subdirectory."""
271 for patch in self.patches:
272 patch.set_relpath(relpath)
273
maruel@chromium.orgb3727a32011-04-04 19:31:44 +0000274 def __iter__(self):
275 for patch in self.patches:
276 yield patch
277
278 @property
279 def filenames(self):
280 return [p.filename for p in self.patches]