maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 1 | # coding=utf8 |
| 2 | # Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | """Utility functions to handle patches.""" |
| 6 | |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 7 | import posixpath |
| 8 | import os |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 9 | import re |
| 10 | |
| 11 | |
| 12 | class UnsupportedPatchFormat(Exception): |
| 13 | def __init__(self, filename, status): |
| 14 | super(UnsupportedPatchFormat, self).__init__(filename, status) |
| 15 | self.filename = filename |
| 16 | self.status = status |
| 17 | |
| 18 | def __str__(self): |
| 19 | out = 'Can\'t process patch for file %s.' % self.filename |
| 20 | if self.status: |
| 21 | out += '\n%s' % self.status |
| 22 | return out |
| 23 | |
| 24 | |
| 25 | class FilePatchBase(object): |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 26 | """Defines a single file being modified. |
| 27 | |
| 28 | '/' is always used instead of os.sep for consistency. |
| 29 | """ |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 30 | is_delete = False |
| 31 | is_binary = False |
| 32 | |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 33 | def __init__(self, filename): |
| 34 | self.filename = None |
| 35 | self._set_filename(filename) |
| 36 | |
| 37 | def _set_filename(self, filename): |
| 38 | self.filename = filename.replace('\\', '/') |
| 39 | # Blacklist a few characters for simplicity. |
| 40 | for i in ('%', '$', '..', '\'', '"'): |
| 41 | if i in self.filename: |
| 42 | self._fail('Can\'t use \'%s\' in filename.' % i) |
| 43 | for i in ('/', 'CON', 'COM'): |
| 44 | if self.filename.startswith(i): |
| 45 | self._fail('Filename can\'t start with \'%s\'.' % i) |
| 46 | |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 47 | def get(self): |
| 48 | raise NotImplementedError('Nothing to grab') |
| 49 | |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 50 | def set_relpath(self, relpath): |
| 51 | if not relpath: |
| 52 | return |
| 53 | relpath = relpath.replace('\\', '/') |
| 54 | if relpath[0] == '/': |
| 55 | self._fail('Relative path starts with %s' % relpath[0]) |
| 56 | self._set_filename(posixpath.join(relpath, self.filename)) |
| 57 | |
| 58 | def _fail(self, msg): |
| 59 | raise UnsupportedPatchFormat(self.filename, msg) |
| 60 | |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 61 | |
| 62 | class FilePatchDelete(FilePatchBase): |
| 63 | """Deletes a file.""" |
| 64 | is_delete = True |
| 65 | |
| 66 | def __init__(self, filename, is_binary): |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 67 | super(FilePatchDelete, self).__init__(filename) |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 68 | self.is_binary = is_binary |
| 69 | |
| 70 | def get(self): |
| 71 | raise NotImplementedError('Nothing to grab') |
| 72 | |
| 73 | |
| 74 | class FilePatchBinary(FilePatchBase): |
| 75 | """Content of a new binary file.""" |
| 76 | is_binary = True |
| 77 | |
| 78 | def __init__(self, filename, data, svn_properties): |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 79 | super(FilePatchBinary, self).__init__(filename) |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 80 | self.data = data |
| 81 | self.svn_properties = svn_properties or [] |
| 82 | |
| 83 | def get(self): |
| 84 | return self.data |
| 85 | |
| 86 | |
| 87 | class FilePatchDiff(FilePatchBase): |
| 88 | """Patch for a single file.""" |
| 89 | |
| 90 | def __init__(self, filename, diff, svn_properties): |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 91 | super(FilePatchDiff, self).__init__(filename) |
maruel@chromium.org | 61e0b69 | 2011-04-12 21:01:01 +0000 | [diff] [blame] | 92 | if not diff: |
| 93 | self._fail('File doesn\'t have a diff.') |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 94 | self.diff_header, self.diff_hunks = self._split_header(diff) |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 95 | self.svn_properties = svn_properties or [] |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 96 | self.is_git_diff = self._is_git_diff_header(self.diff_header) |
| 97 | self.patchlevel = 0 |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 98 | if self.is_git_diff: |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 99 | self._verify_git_header() |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 100 | else: |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 101 | self._verify_svn_header() |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 102 | |
| 103 | def get(self): |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 104 | return self.diff_header + self.diff_hunks |
| 105 | |
| 106 | def set_relpath(self, relpath): |
| 107 | old_filename = self.filename |
| 108 | super(FilePatchDiff, self).set_relpath(relpath) |
| 109 | # Update the header too. |
| 110 | self.diff_header = self.diff_header.replace(old_filename, self.filename) |
| 111 | |
| 112 | def _split_header(self, diff): |
| 113 | """Splits a diff in two: the header and the hunks.""" |
| 114 | header = [] |
| 115 | hunks = diff.splitlines(True) |
| 116 | while hunks: |
| 117 | header.append(hunks.pop(0)) |
| 118 | if header[-1].startswith('--- '): |
| 119 | break |
| 120 | else: |
| 121 | # Some diff may not have a ---/+++ set like a git rename with no change or |
| 122 | # a svn diff with only property change. |
| 123 | pass |
| 124 | |
| 125 | if hunks: |
| 126 | if not hunks[0].startswith('+++ '): |
| 127 | self._fail('Inconsistent header') |
| 128 | header.append(hunks.pop(0)) |
| 129 | if hunks: |
| 130 | if not hunks[0].startswith('@@ '): |
| 131 | self._fail('Inconsistent hunk header') |
| 132 | |
| 133 | # Mangle any \\ in the header to /. |
| 134 | header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---') |
| 135 | basename = os.path.basename(self.filename) |
| 136 | for i in xrange(len(header)): |
| 137 | if (header[i].split(' ', 1)[0] in header_lines or |
| 138 | header[i].endswith(basename)): |
| 139 | header[i] = header[i].replace('\\', '/') |
| 140 | return ''.join(header), ''.join(hunks) |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 141 | |
| 142 | @staticmethod |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 143 | def _is_git_diff_header(diff_header): |
| 144 | """Returns True if the diff for a single files was generated with git.""" |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 145 | # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff |
| 146 | # Rename partial change: |
| 147 | # http://codereview.chromium.org/download/issue6250123_3013_6010.diff |
| 148 | # Rename no change: |
| 149 | # http://codereview.chromium.org/download/issue6287022_3001_4010.diff |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 150 | return any(l.startswith('diff --git') for l in diff_header.splitlines()) |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 151 | |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 152 | def mangle(self, string): |
| 153 | """Mangle a file path.""" |
| 154 | return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:]) |
| 155 | |
| 156 | def _verify_git_header(self): |
| 157 | """Sanity checks the header. |
| 158 | |
| 159 | Expects the following format: |
| 160 | |
| 161 | <garbagge> |
| 162 | diff --git (|a/)<filename> (|b/)<filename> |
| 163 | <similarity> |
| 164 | <filemode changes> |
| 165 | <index> |
| 166 | <copy|rename from> |
| 167 | <copy|rename to> |
| 168 | --- <filename> |
| 169 | +++ <filename> |
| 170 | |
| 171 | Everything is optional except the diff --git line. |
| 172 | """ |
| 173 | lines = self.diff_header.splitlines() |
| 174 | |
| 175 | # Verify the diff --git line. |
| 176 | old = None |
| 177 | new = None |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 178 | while lines: |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 179 | match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0)) |
| 180 | if not match: |
| 181 | continue |
| 182 | old = match.group(1).replace('\\', '/') |
| 183 | new = match.group(2).replace('\\', '/') |
| 184 | if old.startswith('a/') and new.startswith('b/'): |
| 185 | self.patchlevel = 1 |
| 186 | old = old[2:] |
| 187 | new = new[2:] |
| 188 | # The rename is about the new file so the old file can be anything. |
| 189 | if new not in (self.filename, 'dev/null'): |
| 190 | self._fail('Unexpected git diff output name %s.' % new) |
| 191 | if old == 'dev/null' and new == 'dev/null': |
| 192 | self._fail('Unexpected /dev/null git diff.') |
| 193 | break |
| 194 | |
| 195 | if not old or not new: |
| 196 | self._fail('Unexpected git diff; couldn\'t find git header.') |
| 197 | |
| 198 | # Handle these: |
maruel@chromium.org | b6ffdaf | 2011-06-03 19:23:16 +0000 | [diff] [blame^] | 199 | # new file mode \d{6} |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 200 | # rename from <> |
| 201 | # rename to <> |
| 202 | # copy from <> |
| 203 | # copy to <> |
| 204 | while lines: |
| 205 | if lines[0].startswith('--- '): |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 206 | break |
maruel@chromium.org | b6ffdaf | 2011-06-03 19:23:16 +0000 | [diff] [blame^] | 207 | line = lines.pop(0) |
| 208 | match = re.match(r'^(rename|copy) from (.+)$', line) |
| 209 | if match: |
| 210 | if old != match.group(2): |
| 211 | self._fail('Unexpected git diff input name for %s.' % match.group(1)) |
| 212 | if not lines: |
| 213 | self._fail('Missing git diff output name for %s.' % match.group(1)) |
| 214 | match = re.match(r'^(rename|copy) to (.+)$', lines.pop(0)) |
| 215 | if not match: |
| 216 | self._fail('Missing git diff output name for %s.' % match.group(1)) |
| 217 | if new != match.group(2): |
| 218 | self._fail('Unexpected git diff output name for %s.' % match.group(1)) |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 219 | continue |
maruel@chromium.org | b6ffdaf | 2011-06-03 19:23:16 +0000 | [diff] [blame^] | 220 | |
| 221 | match = re.match(r'^new file mode (\d{6})$', line) |
| 222 | if match: |
| 223 | mode = match.group(1) |
| 224 | # Only look at owner ACL for executable. |
| 225 | if bool(int(mode[4]) & 4): |
| 226 | self.svn_properties.append(('svn:executable', '*')) |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 227 | |
| 228 | # Handle ---/+++ |
| 229 | while lines: |
| 230 | match = re.match(r'^--- (.*)$', lines.pop(0)) |
| 231 | if not match: |
| 232 | continue |
| 233 | if old != self.mangle(match.group(1)) and match.group(1) != '/dev/null': |
| 234 | self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1))) |
| 235 | if not lines: |
| 236 | self._fail('Missing git diff output name.') |
| 237 | match = re.match(r'^\+\+\+ (.*)$', lines.pop(0)) |
| 238 | if not match: |
| 239 | self._fail('Unexpected git diff: --- not following +++.') |
| 240 | if new != self.mangle(match.group(1)) and '/dev/null' != match.group(1): |
| 241 | self._fail('Unexpected git diff: %s != %s.' % (new, match.group(1))) |
| 242 | assert not lines, '_split_header() is broken' |
| 243 | break |
| 244 | |
| 245 | def _verify_svn_header(self): |
| 246 | """Sanity checks the header. |
| 247 | |
| 248 | A svn diff can contain only property changes, in that case there will be no |
| 249 | proper header. To make things worse, this property change header is |
| 250 | localized. |
| 251 | """ |
| 252 | lines = self.diff_header.splitlines() |
| 253 | while lines: |
| 254 | match = re.match(r'^--- ([^\t]+).*$', lines.pop(0)) |
| 255 | if not match: |
| 256 | continue |
maruel@chromium.org | c4b5e76 | 2011-04-20 23:56:08 +0000 | [diff] [blame] | 257 | # For copy and renames, it's possible that the -- line doesn't match +++, |
| 258 | # so don't check match.group(1) to match self.filename or '/dev/null', it |
| 259 | # can be anything else. |
| 260 | # TODO(maruel): Handle rename/copy explicitly. |
| 261 | # if match.group(1) not in (self.filename, '/dev/null'): |
| 262 | # self.source_file = match.group(1) |
| 263 | if not lines: |
| 264 | self._fail('Nothing after header.') |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 265 | match = re.match(r'^\+\+\+ ([^\t]+).*$', lines.pop(0)) |
| 266 | if not match: |
| 267 | self._fail('Unexpected diff: --- not following +++.') |
| 268 | if match.group(1) not in (self.filename, '/dev/null'): |
| 269 | self._fail('Unexpected diff: %s.' % match.group(1)) |
| 270 | assert not lines, '_split_header() is broken' |
| 271 | break |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 272 | else: |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 273 | # Cheap check to make sure the file name is at least mentioned in the |
| 274 | # 'diff' header. That the only remaining invariant. |
| 275 | if not self.filename in self.diff_header: |
| 276 | self._fail('Diff seems corrupted.') |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 277 | |
| 278 | |
| 279 | class PatchSet(object): |
| 280 | """A list of FilePatch* objects.""" |
| 281 | |
| 282 | def __init__(self, patches): |
| 283 | self.patches = patches |
maruel@chromium.org | 8a1396c | 2011-04-22 00:14:24 +0000 | [diff] [blame] | 284 | for p in self.patches: |
| 285 | assert isinstance(p, FilePatchBase) |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 286 | |
maruel@chromium.org | cd61940 | 2011-04-09 00:08:00 +0000 | [diff] [blame] | 287 | def set_relpath(self, relpath): |
| 288 | """Used to offset the patch into a subdirectory.""" |
| 289 | for patch in self.patches: |
| 290 | patch.set_relpath(relpath) |
| 291 | |
maruel@chromium.org | b3727a3 | 2011-04-04 19:31:44 +0000 | [diff] [blame] | 292 | def __iter__(self): |
| 293 | for patch in self.patches: |
| 294 | yield patch |
| 295 | |
| 296 | @property |
| 297 | def filenames(self): |
| 298 | return [p.filename for p in self.patches] |