Increase coverage to 91%; Much stricter about header parsing.

Add is_new to be used in a later patch by checkout classes.

R=dpranke@chromium.org
BUG=
TEST=

Review URL: http://codereview.chromium.org/7054048

git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@87838 0039d316-1c4b-4281-b951-d872f2087c98
diff --git a/patch.py b/patch.py
index c627f48..9c54924 100644
--- a/patch.py
+++ b/patch.py
@@ -29,6 +29,7 @@
   """
   is_delete = False
   is_binary = False
+  is_new = False
 
   def __init__(self, filename):
     self.filename = None
@@ -44,7 +45,7 @@
       if self.filename.startswith(i):
         self._fail('Filename can\'t start with \'%s\'.' % i)
 
-  def get(self):
+  def get(self):  # pragma: no coverage
     raise NotImplementedError('Nothing to grab')
 
   def set_relpath(self, relpath):
@@ -75,10 +76,11 @@
   """Content of a new binary file."""
   is_binary = True
 
-  def __init__(self, filename, data, svn_properties):
+  def __init__(self, filename, data, svn_properties, is_new):
     super(FilePatchBinary, self).__init__(filename)
     self.data = data
     self.svn_properties = svn_properties or []
+    self.is_new = is_new
 
   def get(self):
     return self.data
@@ -195,52 +197,86 @@
     if not old or not new:
       self._fail('Unexpected git diff; couldn\'t find git header.')
 
-    # Handle these:
-    # new file mode \d{6}
-    # rename from <>
-    # rename to <>
-    # copy from <>
-    # copy to <>
+    last_line = ''
+
     while lines:
-      if lines[0].startswith('--- '):
-        break
       line = lines.pop(0)
-      match = re.match(r'^(rename|copy) from (.+)$', line)
-      if match:
-        if old != match.group(2):
-          self._fail('Unexpected git diff input name for %s.' % match.group(1))
-        if not lines:
-          self._fail('Missing git diff output name for %s.' % match.group(1))
-        match = re.match(r'^(rename|copy) to (.+)$', lines.pop(0))
-        if not match:
-          self._fail('Missing git diff output name for %s.' % match.group(1))
-        if new != match.group(2):
-          self._fail('Unexpected git diff output name for %s.' % match.group(1))
-        continue
+      # TODO(maruel): old should be replace with self.source_file
+      # TODO(maruel): new == self.filename and remove new
+      self._verify_git_header_process_line(lines, line, last_line, old, new)
+      last_line = line
 
-      match = re.match(r'^new file mode (\d{6})$', line)
-      if match:
-        mode = match.group(1)
-        # Only look at owner ACL for executable.
-        if bool(int(mode[4]) & 4):
-          self.svn_properties.append(('svn:executable', '*'))
+    # Cheap check to make sure the file name is at least mentioned in the
+    # 'diff' header. That the only remaining invariant.
+    if not self.filename in self.diff_header:
+      self._fail('Diff seems corrupted.')
 
-    # Handle ---/+++
-    while lines:
-      match = re.match(r'^--- (.*)$', lines.pop(0))
-      if not match:
-        continue
+  def _verify_git_header_process_line(self, lines, line, last_line, old, new):
+    """Processes a single line of the header.
+
+    Returns True if it should continue looping.
+    """
+    # Handle these:
+    #  rename from <>
+    #  copy from <>
+    match = re.match(r'^(rename|copy) from (.+)$', line)
+    if match:
+      if old != match.group(2):
+        self._fail('Unexpected git diff input name for line %s.' % line)
+      if not lines or not lines[0].startswith('%s to ' % match.group(1)):
+        self._fail(
+            'Confused %s from/to git diff for line %s.' %
+                (match.group(1), line))
+      return
+
+    # Handle these:
+    #  rename to <>
+    #  copy to <>
+    match = re.match(r'^(rename|copy) to (.+)$', line)
+    if match:
+      if new != match.group(2):
+        self._fail('Unexpected git diff output name for line %s.' % line)
+      if not last_line.startswith('%s from ' % match.group(1)):
+        self._fail(
+            'Confused %s from/to git diff for line %s.' %
+                (match.group(1), line))
+      return
+
+    # Handle "new file mode \d{6}"
+    match = re.match(r'^new file mode (\d{6})$', line)
+    if match:
+      mode = match.group(1)
+      # Only look at owner ACL for executable.
+      if bool(int(mode[4]) & 4):
+        self.svn_properties.append(('svn:executable', '*'))
+
+    # Handle "--- "
+    match = re.match(r'^--- (.*)$', line)
+    if match:
+      if last_line[:3] in ('---', '+++'):
+        self._fail('--- and +++ are reversed')
+      self.is_new = match.group(1) == '/dev/null'
+      # TODO(maruel): Use self.source_file.
       if old != self.mangle(match.group(1)) and match.group(1) != '/dev/null':
         self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1)))
-      if not lines:
+      if not lines or not lines[0].startswith('+++'):
         self._fail('Missing git diff output name.')
-      match = re.match(r'^\+\+\+ (.*)$', lines.pop(0))
-      if not match:
+      return
+
+    # Handle "+++ "
+    match = re.match(r'^\+\+\+ (.*)$', line)
+    if match:
+      if not last_line.startswith('---'):
         self._fail('Unexpected git diff: --- not following +++.')
+      # TODO(maruel): new == self.filename.
       if new != self.mangle(match.group(1)) and '/dev/null' != match.group(1):
+        # TODO(maruel): Can +++ be /dev/null? If so, assert self.is_delete ==
+        # True.
         self._fail('Unexpected git diff: %s != %s.' % (new, match.group(1)))
-      assert not lines, '_split_header() is broken'
-      break
+      if lines:
+        self._fail('Crap after +++')
+      # We're done.
+      return
 
   def _verify_svn_header(self):
     """Sanity checks the header.
@@ -250,30 +286,52 @@
     localized.
     """
     lines = self.diff_header.splitlines()
+    last_line = ''
+
     while lines:
-      match = re.match(r'^--- ([^\t]+).*$', lines.pop(0))
-      if not match:
-        continue
-      # For copy and renames, it's possible that the -- line doesn't match +++,
-      # so don't check match.group(1) to match self.filename or '/dev/null', it
-      # can be anything else.
+      line = lines.pop(0)
+      self._verify_svn_header_process_line(lines, line, last_line)
+      last_line = line
+
+    # Cheap check to make sure the file name is at least mentioned in the
+    # 'diff' header. That the only remaining invariant.
+    if not self.filename in self.diff_header:
+      self._fail('Diff seems corrupted.')
+
+  def _verify_svn_header_process_line(self, lines, line, last_line):
+    """Processes a single line of the header.
+
+    Returns True if it should continue looping.
+    """
+    match = re.match(r'^--- ([^\t]+).*$', line)
+    if match:
+      if last_line[:3] in ('---', '+++'):
+        self._fail('--- and +++ are reversed')
+      self.is_new = match.group(1) == '/dev/null'
+      # For copy and renames, it's possible that the -- line doesn't match
+      # +++, so don't check match.group(1) to match self.filename or
+      # '/dev/null', it can be anything else.
       # TODO(maruel): Handle rename/copy explicitly.
-      # if match.group(1) not in (self.filename, '/dev/null'):
+      # if (self.mangle(match.group(1)) != self.filename and
+      #     match.group(1) != '/dev/null'):
       #  self.source_file = match.group(1)
-      if not lines:
+      if not lines or not lines[0].startswith('+++'):
         self._fail('Nothing after header.')
-      match = re.match(r'^\+\+\+ ([^\t]+).*$', lines.pop(0))
-      if not match:
+      return
+
+    match = re.match(r'^\+\+\+ ([^\t]+).*$', line)
+    if match:
+      if not last_line.startswith('---'):
         self._fail('Unexpected diff: --- not following +++.')
-      if match.group(1) not in (self.filename, '/dev/null'):
+      if (self.mangle(match.group(1)) != self.filename and
+          match.group(1) != '/dev/null'):
+        # TODO(maruel): Can +++ be /dev/null? If so, assert self.is_delete ==
+        # True.
         self._fail('Unexpected diff: %s.' % match.group(1))
-      assert not lines, '_split_header() is broken'
-      break
-    else:
-      # Cheap check to make sure the file name is at least mentioned in the
-      # 'diff' header. That the only remaining invariant.
-      if not self.filename in self.diff_header:
-        self._fail('Diff seems corrupted.')
+      if lines:
+        self._fail('Crap after +++')
+      # We're done.
+      return
 
 
 class PatchSet(object):