Copy a newer release of cpplint.py from google-styleguide.
Review URL: http://codereview.chromium.org/147119

git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@19206 0039d316-1c4b-4281-b951-d872f2087c98
diff --git a/cpplint.py b/cpplint.py
index 927ac39..f7adef7 100755
--- a/cpplint.py
+++ b/cpplint.py
@@ -118,7 +118,8 @@
 # We want an explicit list so we can list them all in cpplint --filter=.
 # If you add a new error message with a new category, add it to the list
 # here!  cpplint_unittest.py should tell you if you forget to do this.
-_ERROR_CATEGORIES = """\
+# \ used for clearer layout -- pylint: disable-msg=C6013
+_ERROR_CATEGORIES = '''\
   build/class
   build/deprecated
   build/endif_comment
@@ -147,6 +148,7 @@
   runtime/explicit
   runtime/int
   runtime/init
+  runtime/invalid_increment
   runtime/memset
   runtime/printf
   runtime/printf_format
@@ -171,7 +173,13 @@
   whitespace/semicolon
   whitespace/tab
   whitespace/todo
-"""
+'''
+
+# The default state of the category filter. This is overrided by the --filter=
+# flag. By default all errors are on, so only add here categories that should be
+# off by default (i.e., categories that must be enabled by the --filter= flags).
+# All entries here should start with a '-' or '+', as in the --filter= flag.
+_DEFAULT_FILTERS = []
 
 # We used to check for high-bit characters, but after much discussion we
 # decided those were OK, as long as they were in UTF-8 and didn't represent
@@ -210,19 +218,20 @@
 # testing/base/gunit.h.  Note that the _M versions need to come first
 # for substring matching to work.
 _CHECK_MACROS = [
-    'CHECK',
+    'DCHECK', 'CHECK',
     'EXPECT_TRUE_M', 'EXPECT_TRUE',
     'ASSERT_TRUE_M', 'ASSERT_TRUE',
     'EXPECT_FALSE_M', 'EXPECT_FALSE',
     'ASSERT_FALSE_M', 'ASSERT_FALSE',
     ]
 
-# Replacement macros for CHECK/EXPECT_TRUE/EXPECT_FALSE
+# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
 _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
 
 for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
                         ('>=', 'GE'), ('>', 'GT'),
                         ('<=', 'LE'), ('<', 'LT')]:
+  _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
   _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
   _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
   _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
@@ -358,7 +367,8 @@
   def __init__(self):
     self.verbose_level = 1  # global setting.
     self.error_count = 0    # global count of reported errors
-    self.filters = []       # filters to apply when emitting error messages
+    # filters to apply when emitting error messages
+    self.filters = _DEFAULT_FILTERS[:]
 
     # output format:
     # "emacs" - format that emacs can parse (default)
@@ -384,11 +394,17 @@
     Args:
       filters: A string of comma-separated filters (eg "+whitespace/indent").
                Each filter should start with + or -; else we die.
+
+    Raises:
+      ValueError: The comma-separated filters did not all start with '+' or '-'.
+                  E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
     """
-    if not filters:
-      self.filters = []
-    else:
-      self.filters = filters.split(',')
+    # Default filters always have less priority than the flag ones.
+    self.filters = _DEFAULT_FILTERS[:]
+    for filt in filters.split(','):
+      clean_filt = filt.strip()
+      if clean_filt:
+        self.filters.append(clean_filt)
     for filt in self.filters:
       if not (filt.startswith('+') or filt.startswith('-')):
         raise ValueError('Every filter in --filters must start with + or -'
@@ -742,7 +758,7 @@
   return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
 
 
-class CleansedLines:
+class CleansedLines(object):
   """Holds 3 copies of all lines with different preprocessing applied to them.
 
   1) elided member contains lines without strings and comments,
@@ -858,7 +874,7 @@
 def CheckForHeaderGuard(filename, lines, error):
   """Checks that the file contains a header guard.
 
-  Logs an error if no #ifndef header guard is present.  For google3
+  Logs an error if no #ifndef header guard is present.  For other
   headers, checks that the full pathname is used.
 
   Args:
@@ -1024,6 +1040,7 @@
   line = clean_lines.elided[linenum]
   for single_thread_function, multithread_safe_function in threading_list:
     ix = line.find(single_thread_function)
+    # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
     if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
                                 line[ix - 1] not in ('_', '.', '>'))):
       error(filename, linenum, 'runtime/threadsafe_fn', 2,
@@ -1032,6 +1049,34 @@
             '...) for improved thread safety.')
 
 
+# Matches invalid increment: *count++, which moves pointer insead of
+# incrementing a value.
+_RE_PATTERN_IVALID_INCREMENT = re.compile(
+    r'^\s*\*\w+(\+\+|--);')
+
+
+def CheckInvalidIncrement(filename, clean_lines, linenum, error):
+  """Checks for invalud increment *count++.
+
+  For example following function:
+  void increment_counter(int* count) {
+    *count++;
+  }
+  is invalid, because it effectively does count++, moving pointer, and should
+  be replaced with ++*count, (*count)++ or *count += 1.
+
+  Args:
+    filename: The name of the current file.
+    clean_lines: A CleansedLines instance containing the file.
+    linenum: The number of the line to check.
+    error: The function to call with any errors found.
+  """
+  line = clean_lines.elided[linenum]
+  if _RE_PATTERN_IVALID_INCREMENT.match(line):
+    error(filename, linenum, 'runtime/invalid_increment', 5,
+          'Changing pointer instead of value (or unused value of operator*).')
+
+
 class _ClassInfo(object):
   """Stores information about a class."""
 
@@ -1269,10 +1314,10 @@
       not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
       # Ignore pointers/references to arrays.
       not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
-    if Search(r'\w\s*\(\s', fncall):      # a ( used for a fn call
+    if Search(r'\w\s*\(\s(?!\s*\\$)', fncall):      # a ( used for a fn call
       error(filename, linenum, 'whitespace/parens', 4,
             'Extra space after ( in function call')
-    elif Search(r'\(\s+[^(]', fncall):
+    elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
       error(filename, linenum, 'whitespace/parens', 2,
             'Extra space after (')
     if (Search(r'\w\s+\(', fncall) and
@@ -1331,7 +1376,7 @@
   joined_line = ''
 
   starting_func = False
-  regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ...
+  regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
   match_result = Match(regexp, line)
   if match_result:
     # If the name is all caps and underscores, figure it's a macro and
@@ -1343,10 +1388,7 @@
 
   if starting_func:
     body_found = False
-    # Don't look too far for the function body. Lint might be mistaken about
-    # whether it's a function definition.
-    for start_linenum in xrange(linenum,
-                                min(linenum+100, clean_lines.NumLines())):
+    for start_linenum in xrange(linenum, clean_lines.NumLines()):
       start_line = lines[start_linenum]
       joined_line += ' ' + start_line.lstrip()
       if Search(r'(;|})', start_line):  # Declarations and trivial functions
@@ -1364,9 +1406,7 @@
         function_state.Begin(function)
         break
     if not body_found:
-      # 50 lines after finding a line deemed to start a function
-      # definition, no body for the function was found. A macro
-      # invocation with no terminating semicolon could trigger this.
+      # No body for the function (or evidence of a non-function) was found.
       error(filename, linenum, 'readability/fn_size', 5,
             'Lint failed to find start of function body.')
   elif Match(r'^\}\s*$', line):  # function end
@@ -1404,6 +1444,7 @@
             '"// TODO(my_username): Stuff."')
 
     middle_whitespace = match.group(3)
+    # Comparisons made explicit for correctness -- pylint: disable-msg=C6403
     if middle_whitespace != ' ' and middle_whitespace != '':
       error(filename, linenum, 'whitespace/todo', 2,
             'TODO(my_username) should be followed by a space')
@@ -1498,6 +1539,7 @@
   commentpos = line.find('//')
   if commentpos != -1:
     # Check if the // may be in quotes.  If so, ignore it
+    # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
     if (line.count('"', 0, commentpos) -
         line.count('\\"', 0, commentpos)) % 2 == 0:   # not in quotes
       # Allow one space for new scopes, two spaces otherwise:
@@ -1514,7 +1556,10 @@
         # but some lines are exceptions -- e.g. if they're big
         # comment delimiters like:
         # //----------------------------------------------------------
-        match = Search(r'[=/-]{4,}\s*$', line[commentend:])
+        # or they begin with multiple slashes followed by a space:
+        # //////// Header comment
+        match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
+                 Search(r'^/+ ', line[commentend:]))
         if not match:
           error(filename, linenum, 'whitespace/comments', 4,
                 'Should have a space between // and comment')
@@ -1575,14 +1620,15 @@
   # consistent about how many spaces are inside the parens, and
   # there should either be zero or one spaces inside the parens.
   # We don't want: "if ( foo)" or "if ( foo   )".
-  # Exception: "for ( ; foo; bar)" is allowed.
+  # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
   match = Search(r'\b(if|for|while|switch)\s*'
                  r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
                  line)
   if match:
     if len(match.group(2)) != len(match.group(4)):
       if not (match.group(3) == ';' and
-              len(match.group(2)) == 1 + len(match.group(4))):
+              len(match.group(2)) == 1 + len(match.group(4)) or
+              not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
         error(filename, linenum, 'whitespace/parens', 5,
               'Mismatching spaces inside () in %s' % match.group(1))
     if not len(match.group(2)) in [0, 1]:
@@ -1885,7 +1931,11 @@
       is_header_guard = True
   # #include lines and header guards can be long, since there's no clean way to
   # split them.
-  if not line.startswith('#include') and not is_header_guard:
+  #
+  # URLs can be long too.  It's possible to split these, but it makes them
+  # harder to cut&paste.
+  if (not line.startswith('#include') and not is_header_guard and
+      not Match(r'^\s*//.*http(s?)://\S*$', line)):
     line_width = GetLineWidth(line)
     if line_width > 100:
       error(filename, linenum, 'whitespace/line_length', 4,
@@ -2026,35 +2076,34 @@
   return _OTHER_HEADER
 
 
-def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
-                  error):
-  """Checks rules from the 'C++ language rules' section of cppguide.html.
 
-  Some of these rules are hard to test (function overloading, using
-  uint32 inappropriately), but we do the best we can.
+def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
+  """Check rules that are applicable to #include lines.
+
+  Strings on #include lines are NOT removed from elided line, to make
+  certain tasks easier. However, to prevent false positives, checks
+  applicable to #include lines in CheckLanguage must be put here.
 
   Args:
     filename: The name of the current file.
     clean_lines: A CleansedLines instance containing the file.
     linenum: The number of the line to check.
-    file_extension: The extension (without the dot) of the filename.
     include_state: An _IncludeState instance in which the headers are inserted.
     error: The function to call with any errors found.
   """
   fileinfo = FileInfo(filename)
 
-  # get rid of comments
-  comment_elided_line = clean_lines.lines[linenum]
+  line = clean_lines.lines[linenum]
 
   # "include" should use the new style "foo/bar.h" instead of just "bar.h"
-  if _RE_PATTERN_INCLUDE_NEW_STYLE.search(comment_elided_line):
+  if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
     error(filename, linenum, 'build/include', 4,
           'Include the directory when naming .h files')
 
   # we shouldn't include a file more than once. actually, there are a
   # handful of instances where doing so is okay, but in general it's
   # not.
-  match = _RE_PATTERN_INCLUDE.search(comment_elided_line)
+  match = _RE_PATTERN_INCLUDE.search(line)
   if match:
     include = match.group(2)
     is_system = (match.group(1) == '<')
@@ -2083,12 +2132,42 @@
               '%s. Should be: %s.h, c system, c++ system, other.' %
               (error_message, fileinfo.BaseName()))
 
+  # Look for any of the stream classes that are part of standard C++.
+  match = _RE_PATTERN_INCLUDE.match(line)
+  if match:
+    include = match.group(2)
+    if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
+      # Many unit tests use cout, so we exempt them.
+      if not _IsTestFilename(filename):
+        error(filename, linenum, 'readability/streams', 3,
+              'Streams are highly discouraged.')
+
+def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
+                  error):
+  """Checks rules from the 'C++ language rules' section of cppguide.html.
+
+  Some of these rules are hard to test (function overloading, using
+  uint32 inappropriately), but we do the best we can.
+
+  Args:
+    filename: The name of the current file.
+    clean_lines: A CleansedLines instance containing the file.
+    linenum: The number of the line to check.
+    file_extension: The extension (without the dot) of the filename.
+    include_state: An _IncludeState instance in which the headers are inserted.
+    error: The function to call with any errors found.
+  """
   # If the line is empty or consists of entirely a comment, no need to
   # check it.
   line = clean_lines.elided[linenum]
   if not line:
     return
 
+  match = _RE_PATTERN_INCLUDE.search(line)
+  if match:
+    CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
+    return
+
   # Create an extended_line, which is the concatenation of the current and
   # next lines, for more effective checking of code that may span more than one
   # line.
@@ -2102,16 +2181,6 @@
 
   # TODO(unknown): figure out if they're using default arguments in fn proto.
 
-  # Look for any of the stream classes that are part of standard C++.
-  match = _RE_PATTERN_INCLUDE.match(line)
-  if match:
-    include = match.group(2)
-    if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
-      # Many unit tests use cout, so we exempt them.
-      if not _IsTestFilename(filename):
-        error(filename, linenum, 'readability/streams', 3,
-              'Streams are highly discouraged.')
-
   # Check for non-const references in functions.  This is tricky because &
   # is also used to take the address of something.  We allow <> for templates,
   # (ignoring whatever is between the braces) and : for classes.
@@ -2428,7 +2497,8 @@
 _RE_PATTERN_STRING = re.compile(r'\bstring\b')
 
 _re_pattern_algorithm_header = []
-for _template in ('copy', 'max', 'min', 'sort', 'swap'):
+for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
+                  'transform'):
   # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
   # type::max().
   _re_pattern_algorithm_header.append(
@@ -2445,7 +2515,92 @@
          _header))
 
 
-def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error):
+def FilesBelongToSameModule(filename_cc, filename_h):
+  """Check if these two filenames belong to the same module.
+
+  The concept of a 'module' here is a as follows:
+  foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
+  same 'module' if they are in the same directory.
+  some/path/public/xyzzy and some/path/internal/xyzzy are also considered
+  to belong to the same module here.
+
+  If the filename_cc contains a longer path than the filename_h, for example,
+  '/absolute/path/to/base/sysinfo.cc', and this file would include
+  'base/sysinfo.h', this function also produces the prefix needed to open the
+  header. This is used by the caller of this function to more robustly open the
+  header file. We don't have access to the real include paths in this context,
+  so we need this guesswork here.
+
+  Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
+  according to this implementation. Because of this, this function gives
+  some false positives. This should be sufficiently rare in practice.
+
+  Args:
+    filename_cc: is the path for the .cc file
+    filename_h: is the path for the header path
+
+  Returns:
+    Tuple with a bool and a string:
+    bool: True if filename_cc and filename_h belong to the same module.
+    string: the additional prefix needed to open the header file.
+  """
+
+  if not filename_cc.endswith('.cc'):
+    return (False, '')
+  filename_cc = filename_cc[:-len('.cc')]
+  if filename_cc.endswith('_unittest'):
+    filename_cc = filename_cc[:-len('_unittest')]
+  elif filename_cc.endswith('_test'):
+    filename_cc = filename_cc[:-len('_test')]
+  filename_cc = filename_cc.replace('/public/', '/')
+  filename_cc = filename_cc.replace('/internal/', '/')
+
+  if not filename_h.endswith('.h'):
+    return (False, '')
+  filename_h = filename_h[:-len('.h')]
+  if filename_h.endswith('-inl'):
+    filename_h = filename_h[:-len('-inl')]
+  filename_h = filename_h.replace('/public/', '/')
+  filename_h = filename_h.replace('/internal/', '/')
+
+  files_belong_to_same_module = filename_cc.endswith(filename_h)
+  common_path = ''
+  if files_belong_to_same_module:
+    common_path = filename_cc[:-len(filename_h)]
+  return files_belong_to_same_module, common_path
+
+
+def UpdateIncludeState(filename, include_state, io=codecs):
+  """Fill up the include_state with new includes found from the file.
+
+  Args:
+    filename: the name of the header to read.
+    include_state: an _IncludeState instance in which the headers are inserted.
+    io: The io factory to use to read the file. Provided for testability.
+
+  Returns:
+    True if a header was succesfully added. False otherwise.
+  """
+  headerfile = None
+  try:
+    headerfile = io.open(filename, 'r', 'utf8', 'replace')
+  except IOError:
+    return False
+  linenum = 0
+  for line in headerfile:
+    linenum += 1
+    clean_line = CleanseComments(line)
+    match = _RE_PATTERN_INCLUDE.search(clean_line)
+    if match:
+      include = match.group(2)
+      # The value formatting is cute, but not really used right now.
+      # What matters here is that the key is in include_state.
+      include_state.setdefault(include, '%s:%d' % (filename, linenum))
+  return True
+
+
+def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
+                              io=codecs):
   """Reports for missing stl includes.
 
   This function will output warnings to make sure you are including the headers
@@ -2454,19 +2609,14 @@
   less<> in a .h file, only one (the latter in the file) of these will be
   reported as a reason to include the <functional>.
 
-  We only check headers. We do not check inside cc-files. .cc files should be
-  able to depend on their respective header files for includes.  However, there
-  is no simple way of producing this logic here.
-
   Args:
     filename: The name of the current file.
     clean_lines: A CleansedLines instance containing the file.
     include_state: An _IncludeState instance.
     error: The function to call with any errors found.
+    io: The IO factory to use to read the header file. Provided for unittest
+        injection.
   """
-  if filename.endswith('.cc'):
-    return
-
   required = {}  # A map of header name to linenumber and the template entity.
                  # Example of required: { '<functional>': (1219, 'less<>') }
 
@@ -2491,6 +2641,44 @@
       if pattern.search(line):
         required[header] = (linenum, template)
 
+  # The policy is that if you #include something in foo.h you don't need to
+  # include it again in foo.cc. Here, we will look at possible includes.
+  # Let's copy the include_state so it is only messed up within this function.
+  include_state = include_state.copy()
+
+  # Did we find the header for this file (if any) and succesfully load it?
+  header_found = False
+
+  # Use the absolute path so that matching works properly.
+  abs_filename = os.path.abspath(filename)
+
+  # For Emacs's flymake.
+  # If cpplint is invoked from Emacs's flymake, a temporary file is generated
+  # by flymake and that file name might end with '_flymake.cc'. In that case,
+  # restore original file name here so that the corresponding header file can be
+  # found.
+  # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
+  # instead of 'foo_flymake.h'
+  emacs_flymake_suffix = '_flymake.cc'
+  if abs_filename.endswith(emacs_flymake_suffix):
+    abs_filename = abs_filename[:-len(emacs_flymake_suffix)] + '.cc'
+
+  # include_state is modified during iteration, so we iterate over a copy of
+  # the keys.
+  for header in include_state.keys():  #NOLINT
+    (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
+    fullpath = common_path + header
+    if same_module and UpdateIncludeState(fullpath, include_state, io):
+      header_found = True
+
+  # If we can't find the header file for a .cc, assume it's because we don't
+  # know where to look. In that case we'll give up as we're not sure they
+  # didn't include it in the .h file.
+  # TODO(unknown): Do a better job of finding .h files so we are confident that
+  # not having the .h file means there isn't one.
+  if filename.endswith('.cc') and not header_found:
+    return
+
   # All the lines have been processed, report the errors found.
   for required_header_unstripped in required:
     template = required[required_header_unstripped][1]
@@ -2534,6 +2722,7 @@
   CheckForNonStandardConstructs(filename, clean_lines, line,
                                 class_state, error)
   CheckPosixThreading(filename, clean_lines, line, error)
+  CheckInvalidIncrement(filename, clean_lines, line, error)
 
 
 def ProcessFileData(filename, file_extension, lines, error):
@@ -2691,7 +2880,7 @@
       verbosity = int(val)
     elif opt == '--filter':
       filters = val
-      if filters == '':
+      if not filters:
         PrintCategories()
 
   if not filenames: