Blame - cpplint.py - chromium.googlesource.com/chromium/tools/depot_tools

blob: 016116b5ba9470325a373b095c07772166fbc68e [file] [log] [blame]

maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1	#!/usr/bin/python2.4
				2	#
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	3	# Copyright (c) 2009 Google Inc. All rights reserved.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	4	#
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	5	# Redistribution and use in source and binary forms, with or without
				6	# modification, are permitted provided that the following conditions are
				7	# met:
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	8	#
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	9	# * Redistributions of source code must retain the above copyright
				10	# notice, this list of conditions and the following disclaimer.
				11	# * Redistributions in binary form must reproduce the above
				12	# copyright notice, this list of conditions and the following disclaimer
				13	# in the documentation and/or other materials provided with the
				14	# distribution.
				15	# * Neither the name of Google Inc. nor the names of its
				16	# contributors may be used to endorse or promote products derived from
				17	# this software without specific prior written permission.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	18	#
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	19	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	30
				31	# Here are some issues that I've had people identify in my code during reviews,
				32	# that I think are possible to flag automatically in a lint tool. If these were
				33	# caught by lint, it would save time both for myself and that of my reviewers.
				34	# Most likely, some of these are beyond the scope of the current lint framework,
				35	# but I think it is valuable to retain these wish-list items even if they cannot
				36	# be immediately implemented.
				37	#
				38	# Suggestions
				39	# -----------
				40	# - Check for no 'explicit' for multi-arg ctor
				41	# - Check for boolean assign RHS in parens
				42	# - Check for ctor initializer-list colon position and spacing
				43	# - Check that if there's a ctor, there should be a dtor
				44	# - Check accessors that return non-pointer member variables are
				45	# declared const
				46	# - Check accessors that return non-const pointer member vars are
				47	# not declared const
				48	# - Check for using public includes for testing
				49	# - Check for spaces between brackets in one-line inline method
				50	# - Check for no assert()
				51	# - Check for spaces surrounding operators
				52	# - Check for 0 in pointer context (should be NULL)
				53	# - Check for 0 in char context (should be '\0')
				54	# - Check for camel-case method name conventions for methods
				55	# that are not simple inline getters and setters
				56	# - Check that base classes have virtual destructors
				57	# put " // namespace" after } that closes a namespace, with
				58	# namespace's name after 'namespace' if it is named.
				59	# - Do not indent namespace contents
				60	# - Avoid inlining non-trivial constructors in header files
				61	# include base/basictypes.h if DISALLOW_EVIL_CONSTRUCTORS is used
				62	# - Check for old-school (void) cast for call-sites of functions
				63	# ignored return value
				64	# - Check gUnit usage of anonymous namespace
				65	# - Check for class declaration order (typedefs, consts, enums,
				66	# ctor(s?), dtor, friend declarations, methods, member vars)
				67	#
				68
				69	"""Does google-lint on c++ files.
				70
				71	The goal of this script is to identify places in the code that may
				72	be in non-compliance with google style. It does not attempt to fix
				73	up these problems -- the point is to educate. It does also not
				74	attempt to find all problems, or to ensure that everything it does
				75	find is legitimately a problem.
				76
				77	In particular, we can get very confused by /* and // inside strings!
				78	We do a small hack, which is to ignore //'s with "'s after them on the
				79	same line, but it is far from perfect (in either direction).
				80	"""
				81
				82	import codecs
				83	import getopt
				84	import math # for log
				85	import os
				86	import re
				87	import sre_compile
				88	import string
				89	import sys
				90	import unicodedata
				91
				92
				93	_USAGE = """
				94	Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	95	[--counting=total\|toplevel\|detailed]
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	96	<file> [file] ...
				97
				98	The style guidelines this tries to follow are those in
				99	http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
				100
				101	Every problem is given a confidence score from 1-5, with 5 meaning we are
				102	certain of the problem, and 1 meaning it could be a legitimate construct.
				103	This will miss some errors, and is not a substitute for a code review.
				104
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	105	To suppress false-positive errors of a certain category, add a
				106	'NOLINT(category)' comment to the line. NOLINT or NOLINT(*)
				107	suppresses errors of all categories on that line.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	108
				109	The files passed in will be linted; at least one file must be provided.
				110	Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
				111
				112	Flags:
				113
				114	output=vs7
				115	By default, the output is formatted to ease emacs parsing. Visual Studio
				116	compatible output (vs7) may also be used. Other formats are unsupported.
				117
				118	verbose=#
				119	Specify a number 0-5 to restrict errors to certain verbosity levels.
				120
				121	filter=-x,+y,...
				122	Specify a comma-separated list of category-filters to apply: only
				123	error messages whose category names pass the filters will be printed.
				124	(Category names are printed with the message and look like
				125	"[whitespace/indent]".) Filters are evaluated left to right.
				126	"-FOO" and "FOO" means "do not print categories that start with FOO".
				127	"+FOO" means "do print categories that start with FOO".
				128
				129	Examples: --filter=-whitespace,+whitespace/braces
				130	--filter=whitespace,runtime/printf,+runtime/printf_format
				131	--filter=-,+build/include_what_you_use
				132
				133	To see a list of all the categories used in cpplint, pass no arg:
				134	--filter=
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	135
				136	counting=total\|toplevel\|detailed
				137	The total number of errors found is always printed. If
				138	'toplevel' is provided, then the count of errors in each of
				139	the top-level categories like 'build' and 'whitespace' will
				140	also be printed. If 'detailed' is provided, then a count
				141	is provided for each category like 'build/class'.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	142	"""
				143
				144	# We categorize each error message we print. Here are the categories.
				145	# We want an explicit list so we can list them all in cpplint --filter=.
				146	# If you add a new error message with a new category, add it to the list
				147	# here! cpplint_unittest.py should tell you if you forget to do this.
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	148	# \ used for clearer layout -- pylint: disable-msg=C6013
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	149	_ERROR_CATEGORIES = [
				150	'build/class',
				151	'build/deprecated',
				152	'build/endif_comment',
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	153	'build/explicit_make_pair',
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	154	'build/forward_decl',
				155	'build/header_guard',
				156	'build/include',
				157	'build/include_alpha',
				158	'build/include_order',
				159	'build/include_what_you_use',
				160	'build/namespaces',
				161	'build/printf_format',
				162	'build/storage_class',
				163	'legal/copyright',
				164	'readability/braces',
				165	'readability/casting',
				166	'readability/check',
				167	'readability/constructors',
				168	'readability/fn_size',
				169	'readability/function',
				170	'readability/multiline_comment',
				171	'readability/multiline_string',
				172	'readability/nolint',
				173	'readability/streams',
				174	'readability/todo',
				175	'readability/utf8',
				176	'runtime/arrays',
				177	'runtime/casting',
				178	'runtime/explicit',
				179	'runtime/int',
				180	'runtime/init',
				181	'runtime/invalid_increment',
				182	'runtime/member_string_references',
				183	'runtime/memset',
				184	'runtime/operator',
				185	'runtime/printf',
				186	'runtime/printf_format',
				187	'runtime/references',
				188	'runtime/rtti',
				189	'runtime/sizeof',
				190	'runtime/string',
				191	'runtime/threadsafe_fn',
				192	'runtime/virtual',
				193	'whitespace/blank_line',
				194	'whitespace/braces',
				195	'whitespace/comma',
				196	'whitespace/comments',
				197	'whitespace/end_of_line',
				198	'whitespace/ending_newline',
				199	'whitespace/indent',
				200	'whitespace/labels',
				201	'whitespace/line_length',
				202	'whitespace/newline',
				203	'whitespace/operators',
				204	'whitespace/parens',
				205	'whitespace/semicolon',
				206	'whitespace/tab',
				207	'whitespace/todo'
				208	]
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	209
				210	# The default state of the category filter. This is overrided by the --filter=
				211	# flag. By default all errors are on, so only add here categories that should be
				212	# off by default (i.e., categories that must be enabled by the --filter= flags).
				213	# All entries here should start with a '-' or '+', as in the --filter= flag.
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	214	_DEFAULT_FILTERS = ['-build/include_alpha']
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	215
				216	# We used to check for high-bit characters, but after much discussion we
				217	# decided those were OK, as long as they were in UTF-8 and didn't represent
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	218	# hard-coded international strings, which belong in a separate i18n file.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	219
				220	# Headers that we consider STL headers.
				221	_STL_HEADERS = frozenset([
				222	'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
				223	'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	224	'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'new',
				225	'pair.h', 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	226	'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
				227	'utility', 'vector', 'vector.h',
				228	])
				229
				230
				231	# Non-STL C++ system headers.
				232	_CPP_HEADERS = frozenset([
				233	'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
				234	'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
				235	'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
				236	'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
				237	'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
				238	'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	239	'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream',
				240	'istream.h', 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
				241	'numeric', 'ostream', 'ostream.h', 'parsestream.h', 'pfstream.h',
				242	'PlotFile.h', 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h',
				243	'ropeimpl.h', 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	244	'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
				245	'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
				246	])
				247
				248
				249	# Assertion macros. These are defined in base/logging.h and
				250	# testing/base/gunit.h. Note that the _M versions need to come first
				251	# for substring matching to work.
				252	_CHECK_MACROS = [
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	253	'DCHECK', 'CHECK',
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	254	'EXPECT_TRUE_M', 'EXPECT_TRUE',
				255	'ASSERT_TRUE_M', 'ASSERT_TRUE',
				256	'EXPECT_FALSE_M', 'EXPECT_FALSE',
				257	'ASSERT_FALSE_M', 'ASSERT_FALSE',
				258	]
				259
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	260	# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	261	_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
				262
				263	for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
				264	('>=', 'GE'), ('>', 'GT'),
				265	('<=', 'LE'), ('<', 'LT')]:
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	266	_CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	267	_CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
				268	_CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
				269	_CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
				270	_CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
				271	_CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
				272
				273	for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
				274	('>=', 'LT'), ('>', 'LE'),
				275	('<=', 'GT'), ('<', 'GE')]:
				276	_CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
				277	_CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
				278	_CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
				279	_CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
				280
				281
				282	# These constants define types of headers for use with
				283	# _IncludeState.CheckNextIncludeOrder().
				284	_C_SYS_HEADER = 1
				285	_CPP_SYS_HEADER = 2
				286	_LIKELY_MY_HEADER = 3
				287	_POSSIBLE_MY_HEADER = 4
				288	_OTHER_HEADER = 5
				289
				290
				291	_regexp_compile_cache = {}
				292
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	293	# Finds occurrences of NOLINT or NOLINT(...).
				294	_RE_SUPPRESSION = re.compile(r'\bNOLINT\b($[^)]*$)?')
				295
				296	# {str, set(int)}: a map from error categories to sets of linenumbers
				297	# on which those errors are expected and should be suppressed.
				298	_error_suppressions = {}
				299
				300	def ParseNolintSuppressions(filename, raw_line, linenum, error):
				301	"""Updates the global list of error-suppressions.
				302
				303	Parses any NOLINT comments on the current line, updating the global
				304	error_suppressions store. Reports an error if the NOLINT comment
				305	was malformed.
				306
				307	Args:
				308	filename: str, the name of the input file.
				309	raw_line: str, the line of input text, with comments.
				310	linenum: int, the number of the current line.
				311	error: function, an error handler.
				312	"""
				313	# FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	314	matched = _RE_SUPPRESSION.search(raw_line)
				315	if matched:
				316	category = matched.group(1)
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	317	if category in (None, '(*)'): # => "suppress all"
				318	_error_suppressions.setdefault(None, set()).add(linenum)
				319	else:
				320	if category.startswith('(') and category.endswith(')'):
				321	category = category[1:-1]
				322	if category in _ERROR_CATEGORIES:
				323	_error_suppressions.setdefault(category, set()).add(linenum)
				324	else:
				325	error(filename, linenum, 'readability/nolint', 5,
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	326	'Unknown NOLINT error category: %s' % category)
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	327
				328
				329	def ResetNolintSuppressions():
				330	"Resets the set of NOLINT suppressions to empty."
				331	_error_suppressions.clear()
				332
				333
				334	def IsErrorSuppressedByNolint(category, linenum):
				335	"""Returns true if the specified error category is suppressed on this line.
				336
				337	Consults the global error_suppressions map populated by
				338	ParseNolintSuppressions/ResetNolintSuppressions.
				339
				340	Args:
				341	category: str, the category of the error.
				342	linenum: int, the current line number.
				343	Returns:
				344	bool, True iff the error should be suppressed due to a NOLINT comment.
				345	"""
				346	return (linenum in _error_suppressions.get(category, set()) or
				347	linenum in _error_suppressions.get(None, set()))
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	348
				349	def Match(pattern, s):
				350	"""Matches the string with the pattern, caching the compiled regexp."""
				351	# The regexp compilation caching is inlined in both Match and Search for
				352	# performance reasons; factoring it out into a separate function turns out
				353	# to be noticeably expensive.
				354	if not pattern in _regexp_compile_cache:
				355	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				356	return _regexp_compile_cache[pattern].match(s)
				357
				358
				359	def Search(pattern, s):
				360	"""Searches the string for the pattern, caching the compiled regexp."""
				361	if not pattern in _regexp_compile_cache:
				362	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				363	return _regexp_compile_cache[pattern].search(s)
				364
				365
				366	class _IncludeState(dict):
				367	"""Tracks line numbers for includes, and the order in which includes appear.
				368
				369	As a dict, an _IncludeState object serves as a mapping between include
				370	filename and line number on which that file was included.
				371
				372	Call CheckNextIncludeOrder() once for each header in the file, passing
				373	in the type constants defined above. Calls in an illegal order will
				374	raise an _IncludeError with an appropriate error message.
				375
				376	"""
				377	# self._section will move monotonically through this set. If it ever
				378	# needs to move backwards, CheckNextIncludeOrder will raise an error.
				379	_INITIAL_SECTION = 0
				380	_MY_H_SECTION = 1
				381	_C_SECTION = 2
				382	_CPP_SECTION = 3
				383	_OTHER_H_SECTION = 4
				384
				385	_TYPE_NAMES = {
				386	_C_SYS_HEADER: 'C system header',
				387	_CPP_SYS_HEADER: 'C++ system header',
				388	_LIKELY_MY_HEADER: 'header this file implements',
				389	_POSSIBLE_MY_HEADER: 'header this file may implement',
				390	_OTHER_HEADER: 'other header',
				391	}
				392	_SECTION_NAMES = {
				393	_INITIAL_SECTION: "... nothing. (This can't be an error.)",
				394	_MY_H_SECTION: 'a header this file implements',
				395	_C_SECTION: 'C system header',
				396	_CPP_SECTION: 'C++ system header',
				397	_OTHER_H_SECTION: 'other header',
				398	}
				399
				400	def __init__(self):
				401	dict.__init__(self)
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	402	# The name of the current section.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	403	self._section = self._INITIAL_SECTION
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	404	# The path of last found header.
				405	self._last_header = ''
				406
				407	def CanonicalizeAlphabeticalOrder(self, header_path):
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	408	"""Returns a path canonicalized for alphabetical comparison.
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	409
				410	- replaces "-" with "_" so they both cmp the same.
				411	- removes '-inl' since we don't require them to be after the main header.
				412	- lowercase everything, just in case.
				413
				414	Args:
				415	header_path: Path to be canonicalized.
				416
				417	Returns:
				418	Canonicalized path.
				419	"""
				420	return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
				421
				422	def IsInAlphabeticalOrder(self, header_path):
				423	"""Check if a header is in alphabetical order with the previous header.
				424
				425	Args:
				426	header_path: Header to be checked.
				427
				428	Returns:
				429	Returns true if the header is in alphabetical order.
				430	"""
				431	canonical_header = self.CanonicalizeAlphabeticalOrder(header_path)
				432	if self._last_header > canonical_header:
				433	return False
				434	self._last_header = canonical_header
				435	return True
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	436
				437	def CheckNextIncludeOrder(self, header_type):
				438	"""Returns a non-empty error message if the next header is out of order.
				439
				440	This function also updates the internal state to be ready to check
				441	the next include.
				442
				443	Args:
				444	header_type: One of the _XXX_HEADER constants defined above.
				445
				446	Returns:
				447	The empty string if the header is in the right order, or an
				448	error message describing what's wrong.
				449
				450	"""
				451	error_message = ('Found %s after %s' %
				452	(self._TYPE_NAMES[header_type],
				453	self._SECTION_NAMES[self._section]))
				454
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	455	last_section = self._section
				456
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	457	if header_type == _C_SYS_HEADER:
				458	if self._section <= self._C_SECTION:
				459	self._section = self._C_SECTION
				460	else:
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	461	self._last_header = ''
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	462	return error_message
				463	elif header_type == _CPP_SYS_HEADER:
				464	if self._section <= self._CPP_SECTION:
				465	self._section = self._CPP_SECTION
				466	else:
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	467	self._last_header = ''
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	468	return error_message
				469	elif header_type == _LIKELY_MY_HEADER:
				470	if self._section <= self._MY_H_SECTION:
				471	self._section = self._MY_H_SECTION
				472	else:
				473	self._section = self._OTHER_H_SECTION
				474	elif header_type == _POSSIBLE_MY_HEADER:
				475	if self._section <= self._MY_H_SECTION:
				476	self._section = self._MY_H_SECTION
				477	else:
				478	# This will always be the fallback because we're not sure
				479	# enough that the header is associated with this file.
				480	self._section = self._OTHER_H_SECTION
				481	else:
				482	assert header_type == _OTHER_HEADER
				483	self._section = self._OTHER_H_SECTION
				484
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	485	if last_section != self._section:
				486	self._last_header = ''
				487
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	488	return ''
				489
				490
				491	class _CppLintState(object):
				492	"""Maintains module-wide state.."""
				493
				494	def __init__(self):
				495	self.verbose_level = 1 # global setting.
				496	self.error_count = 0 # global count of reported errors
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	497	# filters to apply when emitting error messages
				498	self.filters = _DEFAULT_FILTERS[:]
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	499	self.counting = 'total' # In what way are we counting errors?
				500	self.errors_by_category = {} # string to int dict storing error counts
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	501
				502	# output format:
				503	# "emacs" - format that emacs can parse (default)
				504	# "vs7" - format that Microsoft Visual Studio 7 can parse
				505	self.output_format = 'emacs'
				506
				507	def SetOutputFormat(self, output_format):
				508	"""Sets the output format for errors."""
				509	self.output_format = output_format
				510
				511	def SetVerboseLevel(self, level):
				512	"""Sets the module's verbosity, and returns the previous setting."""
				513	last_verbose_level = self.verbose_level
				514	self.verbose_level = level
				515	return last_verbose_level
				516
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	517	def SetCountingStyle(self, counting_style):
				518	"""Sets the module's counting options."""
				519	self.counting = counting_style
				520
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	521	def SetFilters(self, filters):
				522	"""Sets the error-message filters.
				523
				524	These filters are applied when deciding whether to emit a given
				525	error message.
				526
				527	Args:
				528	filters: A string of comma-separated filters (eg "+whitespace/indent").
				529	Each filter should start with + or -; else we die.
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	530
				531	Raises:
				532	ValueError: The comma-separated filters did not all start with '+' or '-'.
				533	E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	534	"""
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	535	# Default filters always have less priority than the flag ones.
				536	self.filters = _DEFAULT_FILTERS[:]
				537	for filt in filters.split(','):
				538	clean_filt = filt.strip()
				539	if clean_filt:
				540	self.filters.append(clean_filt)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	541	for filt in self.filters:
				542	if not (filt.startswith('+') or filt.startswith('-')):
				543	raise ValueError('Every filter in --filters must start with + or -'
				544	' (%s does not)' % filt)
				545
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	546	def ResetErrorCounts(self):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	547	"""Sets the module's error statistic back to zero."""
				548	self.error_count = 0
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	549	self.errors_by_category = {}
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	550
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	551	def IncrementErrorCount(self, category):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	552	"""Bumps the module's error statistic."""
				553	self.error_count += 1
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	554	if self.counting in ('toplevel', 'detailed'):
				555	if self.counting != 'detailed':
				556	category = category.split('/')[0]
				557	if category not in self.errors_by_category:
				558	self.errors_by_category[category] = 0
				559	self.errors_by_category[category] += 1
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	560
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	561	def PrintErrorCounts(self):
				562	"""Print a summary of errors by category, and the total."""
				563	for category, count in self.errors_by_category.iteritems():
				564	sys.stderr.write('Category \'%s\' errors found: %d\n' %
				565	(category, count))
				566	sys.stderr.write('Total errors found: %d\n' % self.error_count)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	567
				568	_cpplint_state = _CppLintState()
				569
				570
				571	def _OutputFormat():
				572	"""Gets the module's output format."""
				573	return _cpplint_state.output_format
				574
				575
				576	def _SetOutputFormat(output_format):
				577	"""Sets the module's output format."""
				578	_cpplint_state.SetOutputFormat(output_format)
				579
				580
				581	def _VerboseLevel():
				582	"""Returns the module's verbosity setting."""
				583	return _cpplint_state.verbose_level
				584
				585
				586	def _SetVerboseLevel(level):
				587	"""Sets the module's verbosity, and returns the previous setting."""
				588	return _cpplint_state.SetVerboseLevel(level)
				589
				590
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	591	def _SetCountingStyle(level):
				592	"""Sets the module's counting options."""
				593	_cpplint_state.SetCountingStyle(level)
				594
				595
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	596	def _Filters():
				597	"""Returns the module's list of output filters, as a list."""
				598	return _cpplint_state.filters
				599
				600
				601	def _SetFilters(filters):
				602	"""Sets the module's error-message filters.
				603
				604	These filters are applied when deciding whether to emit a given
				605	error message.
				606
				607	Args:
				608	filters: A string of comma-separated filters (eg "whitespace/indent").
				609	Each filter should start with + or -; else we die.
				610	"""
				611	_cpplint_state.SetFilters(filters)
				612
				613
				614	class _FunctionState(object):
				615	"""Tracks current function name and the number of lines in its body."""
				616
				617	_NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
				618	_TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
				619
				620	def __init__(self):
				621	self.in_a_function = False
				622	self.lines_in_function = 0
				623	self.current_function = ''
				624
				625	def Begin(self, function_name):
				626	"""Start analyzing function body.
				627
				628	Args:
				629	function_name: The name of the function being tracked.
				630	"""
				631	self.in_a_function = True
				632	self.lines_in_function = 0
				633	self.current_function = function_name
				634
				635	def Count(self):
				636	"""Count line in current function body."""
				637	if self.in_a_function:
				638	self.lines_in_function += 1
				639
				640	def Check(self, error, filename, linenum):
				641	"""Report if too many lines in function body.
				642
				643	Args:
				644	error: The function to call with any errors found.
				645	filename: The name of the current file.
				646	linenum: The number of the line to check.
				647	"""
				648	if Match(r'T(EST\|est)', self.current_function):
				649	base_trigger = self._TEST_TRIGGER
				650	else:
				651	base_trigger = self._NORMAL_TRIGGER
				652	trigger = base_trigger * 2**_VerboseLevel()
				653
				654	if self.lines_in_function > trigger:
				655	error_level = int(math.log(self.lines_in_function / base_trigger, 2))
				656	# 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
				657	if error_level > 5:
				658	error_level = 5
				659	error(filename, linenum, 'readability/fn_size', error_level,
				660	'Small and focused functions are preferred:'
				661	' %s has %d non-comment lines'
				662	' (error triggered by exceeding %d lines).' % (
				663	self.current_function, self.lines_in_function, trigger))
				664
				665	def End(self):
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	666	"""Stop analyzing function body."""
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	667	self.in_a_function = False
				668
				669
				670	class _IncludeError(Exception):
				671	"""Indicates a problem with the include order in a file."""
				672	pass
				673
				674
				675	class FileInfo:
				676	"""Provides utility functions for filenames.
				677
				678	FileInfo provides easy access to the components of a file's path
				679	relative to the project root.
				680	"""
				681
				682	def __init__(self, filename):
				683	self._filename = filename
				684
				685	def FullName(self):
				686	"""Make Windows paths like Unix."""
				687	return os.path.abspath(self._filename).replace('\\', '/')
				688
				689	def RepositoryName(self):
				690	"""FullName after removing the local path to the repository.
				691
				692	If we have a real absolute path name here we can try to do something smart:
				693	detecting the root of the checkout and truncating /path/to/checkout from
				694	the name so that we get header guards that don't include things like
				695	"C:\Documents and Settings\..." or "/home/username/..." in them and thus
				696	people on different computers who have checked the source out to different
				697	locations won't see bogus errors.
				698	"""
				699	fullname = self.FullName()
				700
				701	if os.path.exists(fullname):
				702	project_dir = os.path.dirname(fullname)
				703
				704	if os.path.exists(os.path.join(project_dir, ".svn")):
				705	# If there's a .svn file in the current directory, we recursively look
				706	# up the directory tree for the top of the SVN checkout
				707	root_dir = project_dir
				708	one_up_dir = os.path.dirname(root_dir)
				709	while os.path.exists(os.path.join(one_up_dir, ".svn")):
				710	root_dir = os.path.dirname(root_dir)
				711	one_up_dir = os.path.dirname(one_up_dir)
				712
				713	prefix = os.path.commonprefix([root_dir, project_dir])
				714	return fullname[len(prefix) + 1:]
				715
erg@chromium.org	7956a87	2011-11-30 01:44:03 +0000	[diff] [blame]	716	# Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
				717	# searching up from the current path.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	718	root_dir = os.path.dirname(fullname)
				719	while (root_dir != os.path.dirname(root_dir) and
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	720	not os.path.exists(os.path.join(root_dir, ".git")) and
erg@chromium.org	7956a87	2011-11-30 01:44:03 +0000	[diff] [blame]	721	not os.path.exists(os.path.join(root_dir, ".hg")) and
				722	not os.path.exists(os.path.join(root_dir, ".svn"))):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	723	root_dir = os.path.dirname(root_dir)
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	724
				725	if (os.path.exists(os.path.join(root_dir, ".git")) or
erg@chromium.org	7956a87	2011-11-30 01:44:03 +0000	[diff] [blame]	726	os.path.exists(os.path.join(root_dir, ".hg")) or
				727	os.path.exists(os.path.join(root_dir, ".svn"))):
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	728	prefix = os.path.commonprefix([root_dir, project_dir])
				729	return fullname[len(prefix) + 1:]
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	730
				731	# Don't know what to do; header guard warnings may be wrong...
				732	return fullname
				733
				734	def Split(self):
				735	"""Splits the file into the directory, basename, and extension.
				736
				737	For 'chrome/browser/browser.cc', Split() would
				738	return ('chrome/browser', 'browser', '.cc')
				739
				740	Returns:
				741	A tuple of (directory, basename, extension).
				742	"""
				743
				744	googlename = self.RepositoryName()
				745	project, rest = os.path.split(googlename)
				746	return (project,) + os.path.splitext(rest)
				747
				748	def BaseName(self):
				749	"""File base name - text after the final slash, before the final period."""
				750	return self.Split()[1]
				751
				752	def Extension(self):
				753	"""File extension - text following the final period."""
				754	return self.Split()[2]
				755
				756	def NoExtension(self):
				757	"""File has no source file extension."""
				758	return '/'.join(self.Split()[0:2])
				759
				760	def IsSource(self):
				761	"""File has a source file extension."""
				762	return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
				763
				764
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	765	def _ShouldPrintError(category, confidence, linenum):
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	766	"""If confidence >= verbose, category passes filter and is not suppressed."""
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	767
				768	# There are three ways we might decide not to print an error message:
				769	# a "NOLINT(category)" comment appears in the source,
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	770	# the verbosity level isn't high enough, or the filters filter it out.
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	771	if IsErrorSuppressedByNolint(category, linenum):
				772	return False
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	773	if confidence < _cpplint_state.verbose_level:
				774	return False
				775
				776	is_filtered = False
				777	for one_filter in _Filters():
				778	if one_filter.startswith('-'):
				779	if category.startswith(one_filter[1:]):
				780	is_filtered = True
				781	elif one_filter.startswith('+'):
				782	if category.startswith(one_filter[1:]):
				783	is_filtered = False
				784	else:
				785	assert False # should have been checked for in SetFilter.
				786	if is_filtered:
				787	return False
				788
				789	return True
				790
				791
				792	def Error(filename, linenum, category, confidence, message):
				793	"""Logs the fact we've found a lint error.
				794
				795	We log where the error was found, and also our confidence in the error,
				796	that is, how certain we are this is a legitimate style regression, and
				797	not a misidentification or a use that's sometimes justified.
				798
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	799	False positives can be suppressed by the use of
				800	"cpplint(category)" comments on the offending line. These are
				801	parsed into _error_suppressions.
				802
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	803	Args:
				804	filename: The name of the file containing the error.
				805	linenum: The number of the line containing the error.
				806	category: A string used to describe the "category" this bug
				807	falls under: "whitespace", say, or "runtime". Categories
				808	may have a hierarchy separated by slashes: "whitespace/indent".
				809	confidence: A number from 1-5 representing a confidence score for
				810	the error, with 5 meaning that we are certain of the problem,
				811	and 1 meaning that it could be a legitimate construct.
				812	message: The error message.
				813	"""
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	814	if _ShouldPrintError(category, confidence, linenum):
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	815	_cpplint_state.IncrementErrorCount(category)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	816	if _cpplint_state.output_format == 'vs7':
				817	sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
				818	filename, linenum, message, category, confidence))
				819	else:
				820	sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
				821	filename, linenum, message, category, confidence))
				822
				823
				824	# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
				825	_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
				826	r'\\([abfnrtv?"\\\']\|\d+\|x[0-9a-fA-F]+)')
				827	# Matches strings. Escape codes should already be removed by ESCAPES.
				828	_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
				829	# Matches characters. Escape codes should already be removed by ESCAPES.
				830	_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
				831	# Matches multi-line C++ comments.
				832	# This RE is a little bit more complicated than one might expect, because we
				833	# have to take care of space removals tools so we can handle comments inside
				834	# statements better.
				835	# The current rule is: We only clear spaces from both sides when we're at the
				836	# end of the line. Otherwise, we try to remove spaces from the right side,
				837	# if this doesn't work we try on left side but only if there's a non-character
				838	# on the right.
				839	_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
				840	r"""(\s/\.\/\s*$\|
				841	/\.\*/\s+\|
				842	\s+/\.\*/(?=\W)\|
				843	/\.\*/)""", re.VERBOSE)
				844
				845
				846	def IsCppString(line):
				847	"""Does line terminate so, that the next symbol is in string constant.
				848
				849	This function does not consider single-line nor multi-line comments.
				850
				851	Args:
				852	line: is a partial line of code starting from the 0..n.
				853
				854	Returns:
				855	True, if next character appended to 'line' is inside a
				856	string constant.
				857	"""
				858
				859	line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
				860	return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
				861
				862
				863	def FindNextMultiLineCommentStart(lines, lineix):
				864	"""Find the beginning marker for a multiline comment."""
				865	while lineix < len(lines):
				866	if lines[lineix].strip().startswith('/*'):
				867	# Only return this marker if the comment goes beyond this line
				868	if lines[lineix].strip().find('*/', 2) < 0:
				869	return lineix
				870	lineix += 1
				871	return len(lines)
				872
				873
				874	def FindNextMultiLineCommentEnd(lines, lineix):
				875	"""We are inside a comment, find the end marker."""
				876	while lineix < len(lines):
				877	if lines[lineix].strip().endswith('*/'):
				878	return lineix
				879	lineix += 1
				880	return len(lines)
				881
				882
				883	def RemoveMultiLineCommentsFromRange(lines, begin, end):
				884	"""Clears a range of lines for multi-line comments."""
				885	# Having // dummy comments makes the lines non-empty, so we will not get
				886	# unnecessary blank line warnings later in the code.
				887	for i in range(begin, end):
				888	lines[i] = '// dummy'
				889
				890
				891	def RemoveMultiLineComments(filename, lines, error):
				892	"""Removes multiline (c-style) comments from lines."""
				893	lineix = 0
				894	while lineix < len(lines):
				895	lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
				896	if lineix_begin >= len(lines):
				897	return
				898	lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
				899	if lineix_end >= len(lines):
				900	error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
				901	'Could not find end of multi-line comment')
				902	return
				903	RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
				904	lineix = lineix_end + 1
				905
				906
				907	def CleanseComments(line):
				908	"""Removes //-comments and single-line C-style /* */ comments.
				909
				910	Args:
				911	line: A line of C++ source.
				912
				913	Returns:
				914	The line with single-line comments removed.
				915	"""
				916	commentpos = line.find('//')
				917	if commentpos != -1 and not IsCppString(line[:commentpos]):
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	918	line = line[:commentpos].rstrip()
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	919	# get rid of /* ... */
				920	return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
				921
				922
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	923	class CleansedLines(object):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	924	"""Holds 3 copies of all lines with different preprocessing applied to them.
				925
				926	1) elided member contains lines without strings and comments,
				927	2) lines member contains lines without comments, and
				928	3) raw member contains all the lines without processing.
				929	All these three members are of <type 'list'>, and of the same length.
				930	"""
				931
				932	def __init__(self, lines):
				933	self.elided = []
				934	self.lines = []
				935	self.raw_lines = lines
				936	self.num_lines = len(lines)
				937	for linenum in range(len(lines)):
				938	self.lines.append(CleanseComments(lines[linenum]))
				939	elided = self._CollapseStrings(lines[linenum])
				940	self.elided.append(CleanseComments(elided))
				941
				942	def NumLines(self):
				943	"""Returns the number of lines represented."""
				944	return self.num_lines
				945
				946	@staticmethod
				947	def _CollapseStrings(elided):
				948	"""Collapses strings and chars on a line to simple "" or '' blocks.
				949
				950	We nix strings first so we're not fooled by text like '"http://"'
				951
				952	Args:
				953	elided: The line being processed.
				954
				955	Returns:
				956	The line with collapsed strings.
				957	"""
				958	if not _RE_PATTERN_INCLUDE.match(elided):
				959	# Remove escaped characters first to make quote/single quote collapsing
				960	# basic. Things that look like escaped characters shouldn't occur
				961	# outside of strings and chars.
				962	elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
				963	elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
				964	elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
				965	return elided
				966
				967
				968	def CloseExpression(clean_lines, linenum, pos):
				969	"""If input points to ( or { or [, finds the position that closes it.
				970
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	971	If lines[linenum][pos] points to a '(' or '{' or '[', finds the
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	972	linenum/pos that correspond to the closing of the expression.
				973
				974	Args:
				975	clean_lines: A CleansedLines instance containing the file.
				976	linenum: The number of the line to check.
				977	pos: A position on the line.
				978
				979	Returns:
				980	A tuple (line, linenum, pos) pointer past the closing brace, or
				981	(line, len(lines), -1) if we never find a close. Note we ignore
				982	strings and comments when matching; and the line we return is the
				983	'cleansed' line at linenum.
				984	"""
				985
				986	line = clean_lines.elided[linenum]
				987	startchar = line[pos]
				988	if startchar not in '({[':
				989	return (line, clean_lines.NumLines(), -1)
				990	if startchar == '(': endchar = ')'
				991	if startchar == '[': endchar = ']'
				992	if startchar == '{': endchar = '}'
				993
				994	num_open = line.count(startchar) - line.count(endchar)
				995	while linenum < clean_lines.NumLines() and num_open > 0:
				996	linenum += 1
				997	line = clean_lines.elided[linenum]
				998	num_open += line.count(startchar) - line.count(endchar)
				999	# OK, now find the endchar that actually got us back to even
				1000	endpos = len(line)
				1001	while num_open >= 0:
				1002	endpos = line.rfind(')', 0, endpos)
				1003	num_open -= 1 # chopped off another )
				1004	return (line, linenum, endpos + 1)
				1005
				1006
				1007	def CheckForCopyright(filename, lines, error):
				1008	"""Logs an error if no Copyright message appears at the top of the file."""
				1009
				1010	# We'll say it should occur by line 10. Don't forget there's a
				1011	# dummy line at the front.
				1012	for line in xrange(1, min(len(lines), 11)):
				1013	if re.search(r'Copyright', lines[line], re.I): break
				1014	else: # means no copyright line was found
				1015	error(filename, 0, 'legal/copyright', 5,
				1016	'No copyright message found. '
				1017	'You should have a line: "Copyright [year] <Copyright Owner>"')
				1018
				1019
				1020	def GetHeaderGuardCPPVariable(filename):
				1021	"""Returns the CPP variable that should be used as a header guard.
				1022
				1023	Args:
				1024	filename: The name of a C++ header file.
				1025
				1026	Returns:
				1027	The CPP variable that should be used as a header guard in the
				1028	named file.
				1029
				1030	"""
				1031
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	1032	# Restores original filename in case that cpplint is invoked from Emacs's
				1033	# flymake.
				1034	filename = re.sub(r'_flymake\.h$', '.h', filename)
				1035
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1036	fileinfo = FileInfo(filename)
				1037	return re.sub(r'[-./\s]', '_', fileinfo.RepositoryName()).upper() + '_'
				1038
				1039
				1040	def CheckForHeaderGuard(filename, lines, error):
				1041	"""Checks that the file contains a header guard.
				1042
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1043	Logs an error if no #ifndef header guard is present. For other
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1044	headers, checks that the full pathname is used.
				1045
				1046	Args:
				1047	filename: The name of the C++ header file.
				1048	lines: An array of strings, each representing a line of the file.
				1049	error: The function to call with any errors found.
				1050	"""
				1051
				1052	cppvar = GetHeaderGuardCPPVariable(filename)
				1053
				1054	ifndef = None
				1055	ifndef_linenum = 0
				1056	define = None
				1057	endif = None
				1058	endif_linenum = 0
				1059	for linenum, line in enumerate(lines):
				1060	linesplit = line.split()
				1061	if len(linesplit) >= 2:
				1062	# find the first occurrence of #ifndef and #define, save arg
				1063	if not ifndef and linesplit[0] == '#ifndef':
				1064	# set ifndef to the header guard presented on the #ifndef line.
				1065	ifndef = linesplit[1]
				1066	ifndef_linenum = linenum
				1067	if not define and linesplit[0] == '#define':
				1068	define = linesplit[1]
				1069	# find the last occurrence of #endif, save entire line
				1070	if line.startswith('#endif'):
				1071	endif = line
				1072	endif_linenum = linenum
				1073
				1074	if not ifndef or not define or ifndef != define:
				1075	error(filename, 0, 'build/header_guard', 5,
				1076	'No #ifndef header guard found, suggested CPP variable is: %s' %
				1077	cppvar)
				1078	return
				1079
				1080	# The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
				1081	# for backward compatibility.
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	1082	if ifndef != cppvar:
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1083	error_level = 0
				1084	if ifndef != cppvar + '_':
				1085	error_level = 5
				1086
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	1087	ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
				1088	error)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1089	error(filename, ifndef_linenum, 'build/header_guard', error_level,
				1090	'#ifndef header guard has wrong style, please use: %s' % cppvar)
				1091
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	1092	if endif != ('#endif // %s' % cppvar):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1093	error_level = 0
				1094	if endif != ('#endif // %s' % (cppvar + '_')):
				1095	error_level = 5
				1096
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	1097	ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
				1098	error)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1099	error(filename, endif_linenum, 'build/header_guard', error_level,
				1100	'#endif line should be "#endif // %s"' % cppvar)
				1101
				1102
				1103	def CheckForUnicodeReplacementCharacters(filename, lines, error):
				1104	"""Logs an error for each line containing Unicode replacement characters.
				1105
				1106	These indicate that either the file contained invalid UTF-8 (likely)
				1107	or Unicode replacement characters (which it shouldn't). Note that
				1108	it's possible for this to throw off line numbering if the invalid
				1109	UTF-8 occurred adjacent to a newline.
				1110
				1111	Args:
				1112	filename: The name of the current file.
				1113	lines: An array of strings, each representing a line of the file.
				1114	error: The function to call with any errors found.
				1115	"""
				1116	for linenum, line in enumerate(lines):
				1117	if u'\ufffd' in line:
				1118	error(filename, linenum, 'readability/utf8', 5,
				1119	'Line contains invalid UTF-8 (or Unicode replacement character).')
				1120
				1121
				1122	def CheckForNewlineAtEOF(filename, lines, error):
				1123	"""Logs an error if there is no newline char at the end of the file.
				1124
				1125	Args:
				1126	filename: The name of the current file.
				1127	lines: An array of strings, each representing a line of the file.
				1128	error: The function to call with any errors found.
				1129	"""
				1130
				1131	# The array lines() was created by adding two newlines to the
				1132	# original file (go figure), then splitting on \n.
				1133	# To verify that the file ends in \n, we just have to make sure the
				1134	# last-but-two element of lines() exists and is empty.
				1135	if len(lines) < 3 or lines[-2]:
				1136	error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
				1137	'Could not find a newline character at the end of the file.')
				1138
				1139
				1140	def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
				1141	"""Logs an error if we see /* ... */ or "..." that extend past one line.
				1142
				1143	/* ... */ comments are legit inside macros, for one line.
				1144	Otherwise, we prefer // comments, so it's ok to warn about the
				1145	other. Likewise, it's ok for strings to extend across multiple
				1146	lines, as long as a line continuation character (backslash)
				1147	terminates each line. Although not currently prohibited by the C++
				1148	style guide, it's ugly and unnecessary. We don't do well with either
				1149	in this lint program, so we warn about both.
				1150
				1151	Args:
				1152	filename: The name of the current file.
				1153	clean_lines: A CleansedLines instance containing the file.
				1154	linenum: The number of the line to check.
				1155	error: The function to call with any errors found.
				1156	"""
				1157	line = clean_lines.elided[linenum]
				1158
				1159	# Remove all \\ (escaped backslashes) from the line. They are OK, and the
				1160	# second (escaped) slash may trigger later \" detection erroneously.
				1161	line = line.replace('\\\\', '')
				1162
				1163	if line.count('/') > line.count('/'):
				1164	error(filename, linenum, 'readability/multiline_comment', 5,
				1165	'Complex multi-line /.../-style comment found. '
				1166	'Lint may give bogus warnings. '
				1167	'Consider replacing these with //-style comments, '
				1168	'with #if 0...#endif, '
				1169	'or with more clearly structured multi-line comments.')
				1170
				1171	if (line.count('"') - line.count('\\"')) % 2:
				1172	error(filename, linenum, 'readability/multiline_string', 5,
				1173	'Multi-line string ("...") found. This lint script doesn\'t '
				1174	'do well with such strings, and may give bogus warnings. They\'re '
				1175	'ugly and unnecessary, and you should use concatenation instead".')
				1176
				1177
				1178	threading_list = (
				1179	('asctime(', 'asctime_r('),
				1180	('ctime(', 'ctime_r('),
				1181	('getgrgid(', 'getgrgid_r('),
				1182	('getgrnam(', 'getgrnam_r('),
				1183	('getlogin(', 'getlogin_r('),
				1184	('getpwnam(', 'getpwnam_r('),
				1185	('getpwuid(', 'getpwuid_r('),
				1186	('gmtime(', 'gmtime_r('),
				1187	('localtime(', 'localtime_r('),
				1188	('rand(', 'rand_r('),
				1189	('readdir(', 'readdir_r('),
				1190	('strtok(', 'strtok_r('),
				1191	('ttyname(', 'ttyname_r('),
				1192	)
				1193
				1194
				1195	def CheckPosixThreading(filename, clean_lines, linenum, error):
				1196	"""Checks for calls to thread-unsafe functions.
				1197
				1198	Much code has been originally written without consideration of
				1199	multi-threading. Also, engineers are relying on their old experience;
				1200	they have learned posix before threading extensions were added. These
				1201	tests guide the engineers to use thread-safe functions (when using
				1202	posix directly).
				1203
				1204	Args:
				1205	filename: The name of the current file.
				1206	clean_lines: A CleansedLines instance containing the file.
				1207	linenum: The number of the line to check.
				1208	error: The function to call with any errors found.
				1209	"""
				1210	line = clean_lines.elided[linenum]
				1211	for single_thread_function, multithread_safe_function in threading_list:
				1212	ix = line.find(single_thread_function)
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1213	# Comparisons made explicit for clarity -- pylint: disable-msg=C6403
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1214	if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
				1215	line[ix - 1] not in ('_', '.', '>'))):
				1216	error(filename, linenum, 'runtime/threadsafe_fn', 2,
				1217	'Consider using ' + multithread_safe_function +
				1218	'...) instead of ' + single_thread_function +
				1219	'...) for improved thread safety.')
				1220
				1221
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	1222	# Matches invalid increment: *count++, which moves pointer instead of
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1223	# incrementing a value.
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	1224	_RE_PATTERN_INVALID_INCREMENT = re.compile(
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1225	r'^\s\\w+(\+\+\|--);')
				1226
				1227
				1228	def CheckInvalidIncrement(filename, clean_lines, linenum, error):
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	1229	"""Checks for invalid increment *count++.
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1230
				1231	For example following function:
				1232	void increment_counter(int* count) {
				1233	*count++;
				1234	}
				1235	is invalid, because it effectively does count++, moving pointer, and should
				1236	be replaced with ++count, (count)++ or *count += 1.
				1237
				1238	Args:
				1239	filename: The name of the current file.
				1240	clean_lines: A CleansedLines instance containing the file.
				1241	linenum: The number of the line to check.
				1242	error: The function to call with any errors found.
				1243	"""
				1244	line = clean_lines.elided[linenum]
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	1245	if _RE_PATTERN_INVALID_INCREMENT.match(line):
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1246	error(filename, linenum, 'runtime/invalid_increment', 5,
				1247	'Changing pointer instead of value (or unused value of operator*).')
				1248
				1249
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1250	class _ClassInfo(object):
				1251	"""Stores information about a class."""
				1252
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	1253	def __init__(self, name, clean_lines, linenum):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1254	self.name = name
				1255	self.linenum = linenum
				1256	self.seen_open_brace = False
				1257	self.is_derived = False
				1258	self.virtual_method_linenumber = None
				1259	self.has_virtual_destructor = False
				1260	self.brace_depth = 0
				1261
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	1262	# Try to find the end of the class. This will be confused by things like:
				1263	# class A {
				1264	# } *x = { ...
				1265	#
				1266	# But it's still good enough for CheckSectionSpacing.
				1267	self.last_line = 0
				1268	depth = 0
				1269	for i in range(linenum, clean_lines.NumLines()):
				1270	line = clean_lines.lines[i]
				1271	depth += line.count('{') - line.count('}')
				1272	if not depth:
				1273	self.last_line = i
				1274	break
				1275
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1276
				1277	class _ClassState(object):
				1278	"""Holds the current state of the parse relating to class declarations.
				1279
				1280	It maintains a stack of _ClassInfos representing the parser's guess
				1281	as to the current nesting of class declarations. The innermost class
				1282	is at the top (back) of the stack. Typically, the stack will either
				1283	be empty or have exactly one entry.
				1284	"""
				1285
				1286	def __init__(self):
				1287	self.classinfo_stack = []
				1288
				1289	def CheckFinished(self, filename, error):
				1290	"""Checks that all classes have been completely parsed.
				1291
				1292	Call this when all lines in a file have been processed.
				1293	Args:
				1294	filename: The name of the current file.
				1295	error: The function to call with any errors found.
				1296	"""
				1297	if self.classinfo_stack:
				1298	# Note: This test can result in false positives if #ifdef constructs
				1299	# get in the way of brace matching. See the testBuildClass test in
				1300	# cpplint_unittest.py for an example of this.
				1301	error(filename, self.classinfo_stack[0].linenum, 'build/class', 5,
				1302	'Failed to find complete declaration of class %s' %
				1303	self.classinfo_stack[0].name)
				1304
				1305
				1306	def CheckForNonStandardConstructs(filename, clean_lines, linenum,
				1307	class_state, error):
				1308	"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
				1309
				1310	Complain about several constructs which gcc-2 accepts, but which are
				1311	not standard C++. Warning about these in lint is one way to ease the
				1312	transition to new compilers.
				1313	- put storage class first (e.g. "static const" instead of "const static").
				1314	- "%lld" instead of %qd" in printf-type functions.
				1315	- "%1$d" is non-standard in printf-type functions.
				1316	- "\%" is an undefined character escape sequence.
				1317	- text after #endif is not allowed.
				1318	- invalid inner-style forward declaration.
				1319	- >? and <? operators, and their >?= and <?= cousins.
				1320	- classes with virtual methods need virtual destructors (compiler warning
				1321	available, but not turned on yet.)
				1322
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	1323	Additionally, check for constructor/destructor style violations and reference
				1324	members, as it is very convenient to do so while checking for
				1325	gcc-2 compliance.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1326
				1327	Args:
				1328	filename: The name of the current file.
				1329	clean_lines: A CleansedLines instance containing the file.
				1330	linenum: The number of the line to check.
				1331	class_state: A _ClassState instance which maintains information about
				1332	the current stack of nested class declarations being parsed.
				1333	error: A callable to which errors are reported, which takes 4 arguments:
				1334	filename, line number, error level, and message
				1335	"""
				1336
				1337	# Remove comments from the line, but leave in strings for now.
				1338	line = clean_lines.lines[linenum]
				1339
				1340	if Search(r'printf\s\(.".%[-+ ]?\dq', line):
				1341	error(filename, linenum, 'runtime/printf_format', 3,
				1342	'%q in format strings is deprecated. Use %ll instead.')
				1343
				1344	if Search(r'printf\s\(.".*%\d+\$', line):
				1345	error(filename, linenum, 'runtime/printf_format', 2,
				1346	'%N$ formats are unconventional. Try rewriting to avoid them.')
				1347
				1348	# Remove escaped backslashes before looking for undefined escapes.
				1349	line = line.replace('\\\\', '')
				1350
				1351	if Search(r'("\|\').*\\(%\|\[\|\(\|{)', line):
				1352	error(filename, linenum, 'build/printf_format', 3,
				1353	'%, [, (, and { are undefined character escapes. Unescape them.')
				1354
				1355	# For the rest, work with both comments and strings removed.
				1356	line = clean_lines.elided[linenum]
				1357
				1358	if Search(r'\b(const\|volatile\|void\|char\|short\|int\|long'
				1359	r'\|float\|double\|signed\|unsigned'
				1360	r'\|schar\|u?int8\|u?int16\|u?int32\|u?int64)'
				1361	r'\s+(auto\|register\|static\|extern\|typedef)\b',
				1362	line):
				1363	error(filename, linenum, 'build/storage_class', 5,
				1364	'Storage class (static, extern, typedef, etc) should be first.')
				1365
				1366	if Match(r'\s#\sendif\s*[^/\s]+', line):
				1367	error(filename, linenum, 'build/endif_comment', 5,
				1368	'Uncommented text after #endif is non-standard. Use a comment.')
				1369
				1370	if Match(r'\sclass\s+(\w+\s::\s)+\w+\s;', line):
				1371	error(filename, linenum, 'build/forward_decl', 5,
				1372	'Inner-style forward declarations are invalid. Remove this line.')
				1373
				1374	if Search(r'(\w+\|[+-]?\d+(\.\d)?)\s(<\|>)\?=?\s(\w+\|[+-]?\d+)(\.\d)?',
				1375	line):
				1376	error(filename, linenum, 'build/deprecated', 3,
				1377	'>? and <? (max and min) operators are non-standard and deprecated.')
				1378
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	1379	if Search(r'^\sconst\sstring\s&\s\w+\s*;', line):
				1380	# TODO(unknown): Could it be expanded safely to arbitrary references,
				1381	# without triggering too many false positives? The first
				1382	# attempt triggered 5 warnings for mostly benign code in the regtest, hence
				1383	# the restriction.
				1384	# Here's the original regexp, for the reference:
				1385	# type_name = r'\w+((\s::\s\w+)\|(\s<\s\w+?\s*>))?'
				1386	# r'\sconst\s' + type_name + '\s&\s\w+\s*;'
				1387	error(filename, linenum, 'runtime/member_string_references', 2,
				1388	'const string& members are dangerous. It is much better to use '
				1389	'alternatives, such as pointers or simple constants.')
				1390
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1391	# Track class entry and exit, and attempt to find cases within the
				1392	# class declaration that don't meet the C++ style
				1393	# guidelines. Tracking is very dependent on the code matching Google
				1394	# style guidelines, but it seems to perform well enough in testing
				1395	# to be a worthwhile addition to the checks.
				1396	classinfo_stack = class_state.classinfo_stack
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	1397	# Look for a class declaration. The regexp accounts for decorated classes
				1398	# such as in:
				1399	# class LOCKABLE API Object {
				1400	# };
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1401	class_decl_match = Match(
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	1402	r'\s(template\s<[\w\s<>,:]>\s)?'
				1403	'(class\|struct)\s+([A-Z_]+\s+)(\w+(::\w+))', line)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1404	if class_decl_match:
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	1405	classinfo_stack.append(_ClassInfo(
				1406	class_decl_match.group(4), clean_lines, linenum))
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1407
				1408	# Everything else in this function uses the top of the stack if it's
				1409	# not empty.
				1410	if not classinfo_stack:
				1411	return
				1412
				1413	classinfo = classinfo_stack[-1]
				1414
				1415	# If the opening brace hasn't been seen look for it and also
				1416	# parent class declarations.
				1417	if not classinfo.seen_open_brace:
				1418	# If the line has a ';' in it, assume it's a forward declaration or
				1419	# a single-line class declaration, which we won't process.
				1420	if line.find(';') != -1:
				1421	classinfo_stack.pop()
				1422	return
				1423	classinfo.seen_open_brace = (line.find('{') != -1)
				1424	# Look for a bare ':'
				1425	if Search('(^\|[^:]):($\|[^:])', line):
				1426	classinfo.is_derived = True
				1427	if not classinfo.seen_open_brace:
				1428	return # Everything else in this function is for after open brace
				1429
				1430	# The class may have been declared with namespace or classname qualifiers.
				1431	# The constructor and destructor will not have those qualifiers.
				1432	base_classname = classinfo.name.split('::')[-1]
				1433
				1434	# Look for single-argument constructors that aren't marked explicit.
				1435	# Technically a valid construct, but against style.
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	1436	args = Match(r'\s+(?:inline\s+)?%s\s*$([^,()]+)$'
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1437	% re.escape(base_classname),
				1438	line)
				1439	if (args and
				1440	args.group(1) != 'void' and
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	1441	not Match(r'(const\s+)?%s\s(?:<\w+>\s)?&' % re.escape(base_classname),
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1442	args.group(1).strip())):
				1443	error(filename, linenum, 'runtime/explicit', 5,
				1444	'Single-argument constructors should be marked explicit.')
				1445
				1446	# Look for methods declared virtual.
				1447	if Search(r'\bvirtual\b', line):
				1448	classinfo.virtual_method_linenumber = linenum
				1449	# Only look for a destructor declaration on the same line. It would
				1450	# be extremely unlikely for the destructor declaration to occupy
				1451	# more than one line.
				1452	if Search(r'~%s\s*\(' % base_classname, line):
				1453	classinfo.has_virtual_destructor = True
				1454
				1455	# Look for class end.
				1456	brace_depth = classinfo.brace_depth
				1457	brace_depth = brace_depth + line.count('{') - line.count('}')
				1458	if brace_depth <= 0:
				1459	classinfo = classinfo_stack.pop()
				1460	# Try to detect missing virtual destructor declarations.
				1461	# For now, only warn if a non-derived class with virtual methods lacks
				1462	# a virtual destructor. This is to make it less likely that people will
				1463	# declare derived virtual destructors without declaring the base
				1464	# destructor virtual.
				1465	if ((classinfo.virtual_method_linenumber is not None) and
				1466	(not classinfo.has_virtual_destructor) and
				1467	(not classinfo.is_derived)): # Only warn for base classes
				1468	error(filename, classinfo.linenum, 'runtime/virtual', 4,
				1469	'The class %s probably needs a virtual destructor due to '
				1470	'having virtual method(s), one declared at line %d.'
				1471	% (classinfo.name, classinfo.virtual_method_linenumber))
				1472	else:
				1473	classinfo.brace_depth = brace_depth
				1474
				1475
				1476	def CheckSpacingForFunctionCall(filename, line, linenum, error):
				1477	"""Checks for the correctness of various spacing around function calls.
				1478
				1479	Args:
				1480	filename: The name of the current file.
				1481	line: The text of the line to check.
				1482	linenum: The number of the line to check.
				1483	error: The function to call with any errors found.
				1484	"""
				1485
				1486	# Since function calls often occur inside if/for/while/switch
				1487	# expressions - which have their own, more liberal conventions - we
				1488	# first see if we should be looking inside such an expression for a
				1489	# function call, to which we can apply more strict standards.
				1490	fncall = line # if there's no control flow construct, look at whole line
				1491	for pattern in (r'\bif\s$(.)$\s*{',
				1492	r'\bfor\s$(.)$\s*{',
				1493	r'\bwhile\s$(.)$\s*[{;]',
				1494	r'\bswitch\s$(.)$\s*{'):
				1495	match = Search(pattern, line)
				1496	if match:
				1497	fncall = match.group(1) # look inside the parens for function calls
				1498	break
				1499
				1500	# Except in if/for/while/switch, there should never be space
				1501	# immediately inside parens (eg "f( 3, 4 )"). We make an exception
				1502	# for nested parens ( (a+b) + c ). Likewise, there should never be
				1503	# a space before a ( when it's a function argument. I assume it's a
				1504	# function argument when the char before the whitespace is legal in
				1505	# a function name (alnum + _) and we're not starting a macro. Also ignore
				1506	# pointers and references to arrays and functions coz they're too tricky:
				1507	# we use a very simple way to recognize these:
				1508	# " (something)(maybe-something)" or
				1509	# " (something)(maybe-something," or
				1510	# " (something)[something]"
				1511	# Note that we assume the contents of [] to be short enough that
				1512	# they'll never need to wrap.
				1513	if ( # Ignore control structures.
				1514	not Search(r'\b(if\|for\|while\|switch\|return\|delete)\b', fncall) and
				1515	# Ignore pointers/references to functions.
				1516	not Search(r' $[^)]+$$[^)]*($\|,$)', fncall) and
				1517	# Ignore pointers/references to arrays.
				1518	not Search(r' $[^)]+$\[[^\]]+\]', fncall)):
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1519	if Search(r'\w\s\(\s(?!\s\\$)', fncall): # a ( used for a fn call
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1520	error(filename, linenum, 'whitespace/parens', 4,
				1521	'Extra space after ( in function call')
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1522	elif Search(r'$\s+(?!(\s*\$\|\()', fncall):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1523	error(filename, linenum, 'whitespace/parens', 2,
				1524	'Extra space after (')
				1525	if (Search(r'\w\s+\(', fncall) and
				1526	not Search(r'#\s*define\|typedef', fncall)):
				1527	error(filename, linenum, 'whitespace/parens', 4,
				1528	'Extra space before ( in function call')
				1529	# If the ) is followed only by a newline or a { + newline, assume it's
				1530	# part of a control statement (if/while/etc), and don't complain
				1531	if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	1532	# If the closing parenthesis is preceded by only whitespaces,
				1533	# try to give a more descriptive error message.
				1534	if Search(r'^\s+\)', fncall):
				1535	error(filename, linenum, 'whitespace/parens', 2,
				1536	'Closing ) should be moved to the previous line')
				1537	else:
				1538	error(filename, linenum, 'whitespace/parens', 2,
				1539	'Extra space before )')
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1540
				1541
				1542	def IsBlankLine(line):
				1543	"""Returns true if the given line is blank.
				1544
				1545	We consider a line to be blank if the line is empty or consists of
				1546	only white spaces.
				1547
				1548	Args:
				1549	line: A line of a string.
				1550
				1551	Returns:
				1552	True, if the given line is blank.
				1553	"""
				1554	return not line or line.isspace()
				1555
				1556
				1557	def CheckForFunctionLengths(filename, clean_lines, linenum,
				1558	function_state, error):
				1559	"""Reports for long function bodies.
				1560
				1561	For an overview why this is done, see:
				1562	http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
				1563
				1564	Uses a simplistic algorithm assuming other style guidelines
				1565	(especially spacing) are followed.
				1566	Only checks unindented functions, so class members are unchecked.
				1567	Trivial bodies are unchecked, so constructors with huge initializer lists
				1568	may be missed.
				1569	Blank/comment lines are not counted so as to avoid encouraging the removal
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	1570	of vertical space and comments just to get through a lint check.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1571	NOLINT on the last line of a function disables this check.
				1572
				1573	Args:
				1574	filename: The name of the current file.
				1575	clean_lines: A CleansedLines instance containing the file.
				1576	linenum: The number of the line to check.
				1577	function_state: Current function name and lines in body so far.
				1578	error: The function to call with any errors found.
				1579	"""
				1580	lines = clean_lines.lines
				1581	line = lines[linenum]
				1582	raw = clean_lines.raw_lines
				1583	raw_line = raw[linenum]
				1584	joined_line = ''
				1585
				1586	starting_func = False
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1587	regexp = r'(\w(\w\|::\|\\|\&\|\s))\(' # decls * & space::name( ...
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1588	match_result = Match(regexp, line)
				1589	if match_result:
				1590	# If the name is all caps and underscores, figure it's a macro and
				1591	# ignore it, unless it's TEST or TEST_F.
				1592	function_name = match_result.group(1).split()[-1]
				1593	if function_name == 'TEST' or function_name == 'TEST_F' or (
				1594	not Match(r'[A-Z_]+$', function_name)):
				1595	starting_func = True
				1596
				1597	if starting_func:
				1598	body_found = False
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1599	for start_linenum in xrange(linenum, clean_lines.NumLines()):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1600	start_line = lines[start_linenum]
				1601	joined_line += ' ' + start_line.lstrip()
				1602	if Search(r'(;\|})', start_line): # Declarations and trivial functions
				1603	body_found = True
				1604	break # ... ignore
				1605	elif Search(r'{', start_line):
				1606	body_found = True
				1607	function = Search(r'((\w\|:)*)\(', line).group(1)
				1608	if Match(r'TEST', function): # Handle TEST... macros
				1609	parameter_regexp = Search(r'($.*$)', joined_line)
				1610	if parameter_regexp: # Ignore bad syntax
				1611	function += parameter_regexp.group(1)
				1612	else:
				1613	function += '()'
				1614	function_state.Begin(function)
				1615	break
				1616	if not body_found:
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1617	# No body for the function (or evidence of a non-function) was found.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1618	error(filename, linenum, 'readability/fn_size', 5,
				1619	'Lint failed to find start of function body.')
				1620	elif Match(r'^\}\s*$', line): # function end
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	1621	function_state.Check(error, filename, linenum)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1622	function_state.End()
				1623	elif not Match(r'^\s*$', line):
				1624	function_state.Count() # Count non-blank/non-comment lines.
				1625
				1626
				1627	_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO($.+?$)?:?(\s\|$)?')
				1628
				1629
				1630	def CheckComment(comment, filename, linenum, error):
				1631	"""Checks for common mistakes in TODO comments.
				1632
				1633	Args:
				1634	comment: The text of the comment from the line in question.
				1635	filename: The name of the current file.
				1636	linenum: The number of the line to check.
				1637	error: The function to call with any errors found.
				1638	"""
				1639	match = _RE_PATTERN_TODO.match(comment)
				1640	if match:
				1641	# One whitespace is correct; zero whitespace is handled elsewhere.
				1642	leading_whitespace = match.group(1)
				1643	if len(leading_whitespace) > 1:
				1644	error(filename, linenum, 'whitespace/todo', 2,
				1645	'Too many spaces before TODO')
				1646
				1647	username = match.group(2)
				1648	if not username:
				1649	error(filename, linenum, 'readability/todo', 2,
				1650	'Missing username in TODO; it should look like '
				1651	'"// TODO(my_username): Stuff."')
				1652
				1653	middle_whitespace = match.group(3)
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1654	# Comparisons made explicit for correctness -- pylint: disable-msg=C6403
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1655	if middle_whitespace != ' ' and middle_whitespace != '':
				1656	error(filename, linenum, 'whitespace/todo', 2,
				1657	'TODO(my_username) should be followed by a space')
				1658
				1659
				1660	def CheckSpacing(filename, clean_lines, linenum, error):
				1661	"""Checks for the correctness of various spacing issues in the code.
				1662
				1663	Things we check for: spaces around operators, spaces after
				1664	if/for/while/switch, no spaces around parens in function calls, two
				1665	spaces between code and comment, don't start a block with a blank
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	1666	line, don't end a function with a blank line, don't add a blank line
				1667	after public/protected/private, don't have too many blank lines in a row.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1668
				1669	Args:
				1670	filename: The name of the current file.
				1671	clean_lines: A CleansedLines instance containing the file.
				1672	linenum: The number of the line to check.
				1673	error: The function to call with any errors found.
				1674	"""
				1675
				1676	raw = clean_lines.raw_lines
				1677	line = raw[linenum]
				1678
				1679	# Before nixing comments, check if the line is blank for no good
				1680	# reason. This includes the first line after a block is opened, and
				1681	# blank lines at the end of a function (ie, right before a line like '}'
				1682	if IsBlankLine(line):
				1683	elided = clean_lines.elided
				1684	prev_line = elided[linenum - 1]
				1685	prevbrace = prev_line.rfind('{')
				1686	# TODO(unknown): Don't complain if line before blank line, and line after,
				1687	# both start with alnums and are indented the same amount.
				1688	# This ignores whitespace at the start of a namespace block
				1689	# because those are not usually indented.
				1690	if (prevbrace != -1 and prev_line[prevbrace:].find('}') == -1
				1691	and prev_line[:prevbrace].find('namespace') == -1):
				1692	# OK, we have a blank line at the start of a code block. Before we
				1693	# complain, we check if it is an exception to the rule: The previous
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	1694	# non-empty line has the parameters of a function header that are indented
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1695	# 4 spaces (because they did not fit in a 80 column line when placed on
				1696	# the same line as the function name). We also check for the case where
				1697	# the previous line is indented 6 spaces, which may happen when the
				1698	# initializers of a constructor do not fit into a 80 column line.
				1699	exception = False
				1700	if Match(r' {6}\w', prev_line): # Initializer list?
				1701	# We are looking for the opening column of initializer list, which
				1702	# should be indented 4 spaces to cause 6 space indentation afterwards.
				1703	search_position = linenum-2
				1704	while (search_position >= 0
				1705	and Match(r' {6}\w', elided[search_position])):
				1706	search_position -= 1
				1707	exception = (search_position >= 0
				1708	and elided[search_position][:5] == ' :')
				1709	else:
				1710	# Search for the function arguments or an initializer list. We use a
				1711	# simple heuristic here: If the line is indented 4 spaces; and we have a
				1712	# closing paren, without the opening paren, followed by an opening brace
				1713	# or colon (for initializer lists) we assume that it is the last line of
				1714	# a function header. If we have a colon indented 4 spaces, it is an
				1715	# initializer list.
				1716	exception = (Match(r' {4}\w[^$]$\s(const\s)?(\{\s$\|:)',
				1717	prev_line)
				1718	or Match(r' {4}:', prev_line))
				1719
				1720	if not exception:
				1721	error(filename, linenum, 'whitespace/blank_line', 2,
				1722	'Blank line at the start of a code block. Is this needed?')
				1723	# This doesn't ignore whitespace at the end of a namespace block
				1724	# because that is too hard without pairing open/close braces;
				1725	# however, a special exception is made for namespace closing
				1726	# brackets which have a comment containing "namespace".
				1727	#
				1728	# Also, ignore blank lines at the end of a block in a long if-else
				1729	# chain, like this:
				1730	# if (condition1) {
				1731	# // Something followed by a blank line
				1732	#
				1733	# } else if (condition2) {
				1734	# // Something else
				1735	# }
				1736	if linenum + 1 < clean_lines.NumLines():
				1737	next_line = raw[linenum + 1]
				1738	if (next_line
				1739	and Match(r'\s*}', next_line)
				1740	and next_line.find('namespace') == -1
				1741	and next_line.find('} else ') == -1):
				1742	error(filename, linenum, 'whitespace/blank_line', 3,
				1743	'Blank line at the end of a code block. Is this needed?')
				1744
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	1745	matched = Match(r'\s*(public\|protected\|private):', prev_line)
				1746	if matched:
				1747	error(filename, linenum, 'whitespace/blank_line', 3,
				1748	'Do not leave a blank line after "%s:"' % matched.group(1))
				1749
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1750	# Next, we complain if there's a comment too near the text
				1751	commentpos = line.find('//')
				1752	if commentpos != -1:
				1753	# Check if the // may be in quotes. If so, ignore it
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1754	# Comparisons made explicit for clarity -- pylint: disable-msg=C6403
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1755	if (line.count('"', 0, commentpos) -
				1756	line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
				1757	# Allow one space for new scopes, two spaces otherwise:
				1758	if (not Match(r'^\s*{ //', line) and
				1759	((commentpos >= 1 and
				1760	line[commentpos-1] not in string.whitespace) or
				1761	(commentpos >= 2 and
				1762	line[commentpos-2] not in string.whitespace))):
				1763	error(filename, linenum, 'whitespace/comments', 2,
				1764	'At least two spaces is best between code and comments')
				1765	# There should always be a space between the // and the comment
				1766	commentend = commentpos + 2
				1767	if commentend < len(line) and not line[commentend] == ' ':
				1768	# but some lines are exceptions -- e.g. if they're big
				1769	# comment delimiters like:
				1770	# //----------------------------------------------------------
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	1771	# or are an empty C++ style Doxygen comment, like:
				1772	# ///
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1773	# or they begin with multiple slashes followed by a space:
				1774	# //////// Header comment
				1775	match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	1776	Search(r'^/$', line[commentend:]) or
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1777	Search(r'^/+ ', line[commentend:]))
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1778	if not match:
				1779	error(filename, linenum, 'whitespace/comments', 4,
				1780	'Should have a space between // and comment')
				1781	CheckComment(line[commentpos:], filename, linenum, error)
				1782
				1783	line = clean_lines.elided[linenum] # get rid of comments and strings
				1784
				1785	# Don't try to do spacing checks for operator methods
				1786	line = re.sub(r'operator(==\|!=\|<\|<<\|<=\|>=\|>>\|>)\(', 'operator\(', line)
				1787
				1788	# We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
				1789	# Otherwise not. Note we only check for non-spaces on both sides;
				1790	# sometimes people put non-spaces on one side when aligning ='s among
				1791	# many lines (not that this is behavior that I approve of...)
				1792	if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if\|while) ', line):
				1793	error(filename, linenum, 'whitespace/operators', 4,
				1794	'Missing spaces around =')
				1795
				1796	# It's ok not to have spaces around binary operators like + - * /, but if
				1797	# there's too little whitespace, we get concerned. It's hard to tell,
				1798	# though, so we punt on this one for now. TODO.
				1799
				1800	# You should always have whitespace around binary operators.
				1801	# Alas, we can't test < or > because they're legitimately used sans spaces
				1802	# (a->b, vector<int> a). The only time we can tell is a < with no >, and
				1803	# only if it's not template params list spilling into the next line.
				1804	match = Search(r'[^<>=!\s](==\|!=\|<=\|>=)[^<>=!\s]', line)
				1805	if not match:
				1806	# Note that while it seems that the '<[^<]*' term in the following
				1807	# regexp could be simplified to '<.*', which would indeed match
				1808	# the same class of strings, the [^<] means that searching for the
				1809	# regexp takes linear rather than quadratic time.
				1810	if not Search(r'<[^<],\s$', line): # template params spill
				1811	match = Search(r'[^<>=!\s](<)[^<>=!\s]([^>]\|->)*$', line)
				1812	if match:
				1813	error(filename, linenum, 'whitespace/operators', 3,
				1814	'Missing spaces around %s' % match.group(1))
				1815	# We allow no-spaces around << and >> when used like this: 10<<20, but
				1816	# not otherwise (particularly, not when used as streams)
				1817	match = Search(r'[^0-9\s](<<\|>>)[^0-9\s]', line)
				1818	if match:
				1819	error(filename, linenum, 'whitespace/operators', 3,
				1820	'Missing spaces around %s' % match.group(1))
				1821
				1822	# There shouldn't be space around unary operators
				1823	match = Search(r'(!\s\|~\s\|[\s]--[\s;]\|[\s]\+\+[\s;])', line)
				1824	if match:
				1825	error(filename, linenum, 'whitespace/operators', 4,
				1826	'Extra space for operator %s' % match.group(1))
				1827
				1828	# A pet peeve of mine: no spaces after an if, while, switch, or for
				1829	match = Search(r' (if\(\|for\(\|while\(\|switch\()', line)
				1830	if match:
				1831	error(filename, linenum, 'whitespace/parens', 5,
				1832	'Missing space before ( in %s' % match.group(1))
				1833
				1834	# For if/for/while/switch, the left and right parens should be
				1835	# consistent about how many spaces are inside the parens, and
				1836	# there should either be zero or one spaces inside the parens.
				1837	# We don't want: "if ( foo)" or "if ( foo )".
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1838	# Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1839	match = Search(r'\b(if\|for\|while\|switch)\s*'
				1840	r'$([ ])(.).[^ ]+([ ])$\s{\s*$',
				1841	line)
				1842	if match:
				1843	if len(match.group(2)) != len(match.group(4)):
				1844	if not (match.group(3) == ';' and
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	1845	len(match.group(2)) == 1 + len(match.group(4)) or
				1846	not match.group(2) and Search(r'\bfor\s$.; $', line)):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1847	error(filename, linenum, 'whitespace/parens', 5,
				1848	'Mismatching spaces inside () in %s' % match.group(1))
				1849	if not len(match.group(2)) in [0, 1]:
				1850	error(filename, linenum, 'whitespace/parens', 5,
				1851	'Should have zero or one spaces inside ( and ) in %s' %
				1852	match.group(1))
				1853
				1854	# You should always have a space after a comma (either as fn arg or operator)
				1855	if Search(r',[^\s]', line):
				1856	error(filename, linenum, 'whitespace/comma', 3,
				1857	'Missing space after ,')
				1858
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	1859	# You should always have a space after a semicolon
				1860	# except for few corner cases
				1861	# TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
				1862	# space after ;
				1863	if Search(r';[^\s};\\)/]', line):
				1864	error(filename, linenum, 'whitespace/semicolon', 3,
				1865	'Missing space after ;')
				1866
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1867	# Next we will look for issues with function calls.
				1868	CheckSpacingForFunctionCall(filename, line, linenum, error)
				1869
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	1870	# Except after an opening paren, or after another opening brace (in case of
				1871	# an initializer list, for instance), you should have spaces before your
				1872	# braces. And since you should never have braces at the beginning of a line,
				1873	# this is an easy test.
				1874	if Search(r'[^ ({]{', line):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1875	error(filename, linenum, 'whitespace/braces', 5,
				1876	'Missing space before {')
				1877
				1878	# Make sure '} else {' has spaces.
				1879	if Search(r'}else', line):
				1880	error(filename, linenum, 'whitespace/braces', 5,
				1881	'Missing space before else')
				1882
				1883	# You shouldn't have spaces before your brackets, except maybe after
				1884	# 'delete []' or 'new char * []'.
				1885	if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
				1886	error(filename, linenum, 'whitespace/braces', 5,
				1887	'Extra space before [')
				1888
				1889	# You shouldn't have a space before a semicolon at the end of the line.
				1890	# There's a special case for "for" since the style guide allows space before
				1891	# the semicolon there.
				1892	if Search(r':\s;\s$', line):
				1893	error(filename, linenum, 'whitespace/semicolon', 5,
				1894	'Semicolon defining empty statement. Use { } instead.')
				1895	elif Search(r'^\s;\s$', line):
				1896	error(filename, linenum, 'whitespace/semicolon', 5,
				1897	'Line contains only semicolon. If this should be an empty statement, '
				1898	'use { } instead.')
				1899	elif (Search(r'\s+;\s*$', line) and
				1900	not Search(r'\bfor\b', line)):
				1901	error(filename, linenum, 'whitespace/semicolon', 5,
				1902	'Extra space before last semicolon. If this should be an empty '
				1903	'statement, use { } instead.')
				1904
				1905
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	1906	def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
				1907	"""Checks for additional blank line issues related to sections.
				1908
				1909	Currently the only thing checked here is blank line before protected/private.
				1910
				1911	Args:
				1912	filename: The name of the current file.
				1913	clean_lines: A CleansedLines instance containing the file.
				1914	class_info: A _ClassInfo objects.
				1915	linenum: The number of the line to check.
				1916	error: The function to call with any errors found.
				1917	"""
				1918	# Skip checks if the class is small, where small means 25 lines or less.
				1919	# 25 lines seems like a good cutoff since that's the usual height of
				1920	# terminals, and any class that can't fit in one screen can't really
				1921	# be considered "small".
				1922	#
				1923	# Also skip checks if we are on the first line. This accounts for
				1924	# classes that look like
				1925	# class Foo { public: ... };
				1926	#
				1927	# If we didn't find the end of the class, last_line would be zero,
				1928	# and the check will be skipped by the first condition.
				1929	if (class_info.last_line - class_info.linenum <= 24 or
				1930	linenum <= class_info.linenum):
				1931	return
				1932
				1933	matched = Match(r'\s*(public\|protected\|private):', clean_lines.lines[linenum])
				1934	if matched:
				1935	# Issue warning if the line before public/protected/private was
				1936	# not a blank line, but don't do this if the previous line contains
				1937	# "class" or "struct". This can happen two ways:
				1938	# - We are at the beginning of the class.
				1939	# - We are forward-declaring an inner class that is semantically
				1940	# private, but needed to be public for implementation reasons.
				1941	prev_line = clean_lines.lines[linenum - 1]
				1942	if (not IsBlankLine(prev_line) and
				1943	not Search(r'\b(class\|struct)\b', prev_line)):
				1944	# Try a bit harder to find the beginning of the class. This is to
				1945	# account for multi-line base-specifier lists, e.g.:
				1946	# class Derived
				1947	# : public Base {
				1948	end_class_head = class_info.linenum
				1949	for i in range(class_info.linenum, linenum):
				1950	if Search(r'\{\s*$', clean_lines.lines[i]):
				1951	end_class_head = i
				1952	break
				1953	if end_class_head < linenum - 1:
				1954	error(filename, linenum, 'whitespace/blank_line', 3,
				1955	'"%s:" should be preceded by a blank line' % matched.group(1))
				1956
				1957
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1958	def GetPreviousNonBlankLine(clean_lines, linenum):
				1959	"""Return the most recent non-blank line and its line number.
				1960
				1961	Args:
				1962	clean_lines: A CleansedLines instance containing the file contents.
				1963	linenum: The number of the line to check.
				1964
				1965	Returns:
				1966	A tuple with two elements. The first element is the contents of the last
				1967	non-blank line before the current line, or the empty string if this is the
				1968	first non-blank line. The second is the line number of that line, or -1
				1969	if this is the first non-blank line.
				1970	"""
				1971
				1972	prevlinenum = linenum - 1
				1973	while prevlinenum >= 0:
				1974	prevline = clean_lines.elided[prevlinenum]
				1975	if not IsBlankLine(prevline): # if not a blank line...
				1976	return (prevline, prevlinenum)
				1977	prevlinenum -= 1
				1978	return ('', -1)
				1979
				1980
				1981	def CheckBraces(filename, clean_lines, linenum, error):
				1982	"""Looks for misplaced braces (e.g. at the end of line).
				1983
				1984	Args:
				1985	filename: The name of the current file.
				1986	clean_lines: A CleansedLines instance containing the file.
				1987	linenum: The number of the line to check.
				1988	error: The function to call with any errors found.
				1989	"""
				1990
				1991	line = clean_lines.elided[linenum] # get rid of comments and strings
				1992
				1993	if Match(r'\s{\s$', line):
				1994	# We allow an open brace to start a line in the case where someone
				1995	# is using braces in a block to explicitly create a new scope,
				1996	# which is commonly used to control the lifetime of
				1997	# stack-allocated variables. We don't detect this perfectly: we
				1998	# just don't complain if the last non-whitespace character on the
				1999	# previous non-blank line is ';', ':', '{', or '}'.
				2000	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
				2001	if not Search(r'[;:}{]\s*$', prevline):
				2002	error(filename, linenum, 'whitespace/braces', 4,
				2003	'{ should almost always be at the end of the previous line')
				2004
				2005	# An else clause should be on the same line as the preceding closing brace.
				2006	if Match(r'\selse\s', line):
				2007	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
				2008	if Match(r'\s}\s$', prevline):
				2009	error(filename, linenum, 'whitespace/newline', 4,
				2010	'An else should appear on the same line as the preceding }')
				2011
				2012	# If braces come on one side of an else, they should be on both.
				2013	# However, we have to worry about "else if" that spans multiple lines!
				2014	if Search(r'}\selse[^{]$', line) or Match(r'[^}]else\s{', line):
				2015	if Search(r'}\selse if([^{])$', line): # could be multi-line if
				2016	# find the ( after the if
				2017	pos = line.find('else if')
				2018	pos = line.find('(', pos)
				2019	if pos > 0:
				2020	(endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
				2021	if endline[endpos:].find('{') == -1: # must be brace after if
				2022	error(filename, linenum, 'readability/braces', 5,
				2023	'If an else has a brace on one side, it should have it on both')
				2024	else: # common case: else not followed by a multi-line if
				2025	error(filename, linenum, 'readability/braces', 5,
				2026	'If an else has a brace on one side, it should have it on both')
				2027
				2028	# Likewise, an else should never have the else clause on the same line
				2029	if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
				2030	error(filename, linenum, 'whitespace/newline', 4,
				2031	'Else clause should never be on same line as else (use 2 lines)')
				2032
				2033	# In the same way, a do/while should never be on one line
				2034	if Match(r'\s*do [^\s{]', line):
				2035	error(filename, linenum, 'whitespace/newline', 4,
				2036	'do/while clauses should not be on a single line')
				2037
				2038	# Braces shouldn't be followed by a ; unless they're defining a struct
				2039	# or initializing an array.
				2040	# We can't tell in general, but we can for some common cases.
				2041	prevlinenum = linenum
				2042	while True:
				2043	(prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
				2044	if Match(r'\s+{.}\s;', line) and not prevline.count(';'):
				2045	line = prevline + line
				2046	else:
				2047	break
				2048	if (Search(r'{.}\s;', line) and
				2049	line.count('{') == line.count('}') and
				2050	not Search(r'struct\|class\|enum\|\s=\s{', line)):
				2051	error(filename, linenum, 'readability/braces', 4,
				2052	"You don't need a ; after a }")
				2053
				2054
				2055	def ReplaceableCheck(operator, macro, line):
				2056	"""Determine whether a basic CHECK can be replaced with a more specific one.
				2057
				2058	For example suggest using CHECK_EQ instead of CHECK(a == b) and
				2059	similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
				2060
				2061	Args:
				2062	operator: The C++ operator used in the CHECK.
				2063	macro: The CHECK or EXPECT macro being called.
				2064	line: The current source line.
				2065
				2066	Returns:
				2067	True if the CHECK can be replaced with a more specific one.
				2068	"""
				2069
				2070	# This matches decimal and hex integers, strings, and chars (in that order).
				2071	match_constant = r'([-+]?(\d+\|0[xX][0-9a-fA-F]+)[lLuU]{0,3}\|"."\|\'.\')'
				2072
				2073	# Expression to match two sides of the operator with something that
				2074	# looks like a literal, since CHECK(x == iterator) won't compile.
				2075	# This means we can't catch all the cases where a more specific
				2076	# CHECK is possible, but it's less annoying than dealing with
				2077	# extraneous warnings.
				2078	match_this = (r'\s' + macro + r'\((\s' +
				2079	match_constant + r'\s' + operator + r'[^<>].\|'
				2080	r'.[^<>]' + operator + r'\s' + match_constant +
				2081	r'\s*\))')
				2082
				2083	# Don't complain about CHECK(x == NULL) or similar because
				2084	# CHECK_EQ(x, NULL) won't compile (requires a cast).
				2085	# Also, don't complain about more complex boolean expressions
				2086	# involving && or \|\| such as CHECK(a == b \|\| c == d).
				2087	return Match(match_this, line) and not Search(r'NULL\|&&\|\\|\\|', line)
				2088
				2089
				2090	def CheckCheck(filename, clean_lines, linenum, error):
				2091	"""Checks the use of CHECK and EXPECT macros.
				2092
				2093	Args:
				2094	filename: The name of the current file.
				2095	clean_lines: A CleansedLines instance containing the file.
				2096	linenum: The number of the line to check.
				2097	error: The function to call with any errors found.
				2098	"""
				2099
				2100	# Decide the set of replacement macros that should be suggested
				2101	raw_lines = clean_lines.raw_lines
				2102	current_macro = ''
				2103	for macro in _CHECK_MACROS:
				2104	if raw_lines[linenum].find(macro) >= 0:
				2105	current_macro = macro
				2106	break
				2107	if not current_macro:
				2108	# Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
				2109	return
				2110
				2111	line = clean_lines.elided[linenum] # get rid of comments and strings
				2112
				2113	# Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
				2114	for operator in ['==', '!=', '>=', '>', '<=', '<']:
				2115	if ReplaceableCheck(operator, current_macro, line):
				2116	error(filename, linenum, 'readability/check', 2,
				2117	'Consider using %s instead of %s(a %s b)' % (
				2118	_CHECK_REPLACEMENT[current_macro][operator],
				2119	current_macro, operator))
				2120	break
				2121
				2122
				2123	def GetLineWidth(line):
				2124	"""Determines the width of the line in column positions.
				2125
				2126	Args:
				2127	line: A string, which may be a Unicode string.
				2128
				2129	Returns:
				2130	The width of the line in column positions, accounting for Unicode
				2131	combining characters and wide characters.
				2132	"""
				2133	if isinstance(line, unicode):
				2134	width = 0
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2135	for uc in unicodedata.normalize('NFC', line):
				2136	if unicodedata.east_asian_width(uc) in ('W', 'F'):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2137	width += 2
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2138	elif not unicodedata.combining(uc):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2139	width += 1
				2140	return width
				2141	else:
				2142	return len(line)
				2143
				2144
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2145	def CheckStyle(filename, clean_lines, linenum, file_extension, class_state,
				2146	error):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2147	"""Checks rules from the 'C++ style rules' section of cppguide.html.
				2148
				2149	Most of these rules are hard to test (naming, comment style), but we
				2150	do what we can. In particular we check for 2-space indents, line lengths,
				2151	tab usage, spaces inside code, etc.
				2152
				2153	Args:
				2154	filename: The name of the current file.
				2155	clean_lines: A CleansedLines instance containing the file.
				2156	linenum: The number of the line to check.
				2157	file_extension: The extension (without the dot) of the filename.
				2158	error: The function to call with any errors found.
				2159	"""
				2160
				2161	raw_lines = clean_lines.raw_lines
				2162	line = raw_lines[linenum]
				2163
				2164	if line.find('\t') != -1:
				2165	error(filename, linenum, 'whitespace/tab', 1,
				2166	'Tab found; better to use spaces')
				2167
				2168	# One or three blank spaces at the beginning of the line is weird; it's
				2169	# hard to reconcile that with 2-space indents.
				2170	# NOTE: here are the conditions rob pike used for his tests. Mine aren't
				2171	# as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
				2172	# if(RLENGTH > 20) complain = 0;
				2173	# if(match($0, " +(error\|private\|public\|protected):")) complain = 0;
				2174	# if(match(prev, "&& *$")) complain = 0;
				2175	# if(match(prev, "\\\|\\\| *$")) complain = 0;
				2176	# if(match(prev, "[\",=><] *$")) complain = 0;
				2177	# if(match($0, " <<")) complain = 0;
				2178	# if(match(prev, " +for \\(")) complain = 0;
				2179	# if(prevodd && match(prevprev, " +for \\(")) complain = 0;
				2180	initial_spaces = 0
				2181	cleansed_line = clean_lines.elided[linenum]
				2182	while initial_spaces < len(line) and line[initial_spaces] == ' ':
				2183	initial_spaces += 1
				2184	if line and line[-1].isspace():
				2185	error(filename, linenum, 'whitespace/end_of_line', 4,
				2186	'Line ends in whitespace. Consider deleting these extra spaces.')
				2187	# There are certain situations we allow one space, notably for labels
				2188	elif ((initial_spaces == 1 or initial_spaces == 3) and
				2189	not Match(r'\s\w+\s:\s*$', cleansed_line)):
				2190	error(filename, linenum, 'whitespace/indent', 3,
				2191	'Weird number of spaces at line-start. '
				2192	'Are you using a 2-space indent?')
				2193	# Labels should always be indented at least one space.
				2194	elif not initial_spaces and line[:2] != '//' and Search(r'[^:]:\s*$',
				2195	line):
				2196	error(filename, linenum, 'whitespace/labels', 4,
				2197	'Labels should always be indented at least one space. '
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	2198	'If this is a member-initializer list in a constructor or '
				2199	'the base class list in a class definition, the colon should '
				2200	'be on the following line.')
				2201
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2202
				2203	# Check if the line is a header guard.
				2204	is_header_guard = False
				2205	if file_extension == 'h':
				2206	cppvar = GetHeaderGuardCPPVariable(filename)
				2207	if (line.startswith('#ifndef %s' % cppvar) or
				2208	line.startswith('#define %s' % cppvar) or
				2209	line.startswith('#endif // %s' % cppvar)):
				2210	is_header_guard = True
				2211	# #include lines and header guards can be long, since there's no clean way to
				2212	# split them.
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	2213	#
				2214	# URLs can be long too. It's possible to split these, but it makes them
				2215	# harder to cut&paste.
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2216	#
				2217	# The "$Id:...$" comment may also get very long without it being the
				2218	# developers fault.
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	2219	if (not line.startswith('#include') and not is_header_guard and
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2220	not Match(r'^\s//.http(s?)://\S*$', line) and
				2221	not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2222	line_width = GetLineWidth(line)
				2223	if line_width > 100:
				2224	error(filename, linenum, 'whitespace/line_length', 4,
				2225	'Lines should very rarely be longer than 100 characters')
				2226	elif line_width > 80:
				2227	error(filename, linenum, 'whitespace/line_length', 2,
				2228	'Lines should be <= 80 characters long')
				2229
				2230	if (cleansed_line.count(';') > 1 and
				2231	# for loops are allowed two ;'s (and may run over two lines).
				2232	cleansed_line.find('for') == -1 and
				2233	(GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
				2234	GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
				2235	# It's ok to have many commands in a switch case that fits in 1 line
				2236	not ((cleansed_line.find('case ') != -1 or
				2237	cleansed_line.find('default:') != -1) and
				2238	cleansed_line.find('break;') != -1)):
				2239	error(filename, linenum, 'whitespace/newline', 4,
				2240	'More than one command on the same line')
				2241
				2242	# Some more style checks
				2243	CheckBraces(filename, clean_lines, linenum, error)
				2244	CheckSpacing(filename, clean_lines, linenum, error)
				2245	CheckCheck(filename, clean_lines, linenum, error)
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2246	if class_state and class_state.classinfo_stack:
				2247	CheckSectionSpacing(filename, clean_lines,
				2248	class_state.classinfo_stack[-1], linenum, error)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2249
				2250
				2251	_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
				2252	_RE_PATTERN_INCLUDE = re.compile(r'^\s#\sinclude\s([<"])([^>"])[>"].*$')
				2253	# Matches the first component of a filename delimited by -s and _s. That is:
				2254	# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
				2255	# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
				2256	# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
				2257	# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
				2258	_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
				2259
				2260
				2261	def _DropCommonSuffixes(filename):
				2262	"""Drops common suffixes like _test.cc or -inl.h from filename.
				2263
				2264	For example:
				2265	>>> _DropCommonSuffixes('foo/foo-inl.h')
				2266	'foo/foo'
				2267	>>> _DropCommonSuffixes('foo/bar/foo.cc')
				2268	'foo/bar/foo'
				2269	>>> _DropCommonSuffixes('foo/foo_internal.h')
				2270	'foo/foo'
				2271	>>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
				2272	'foo/foo_unusualinternal'
				2273
				2274	Args:
				2275	filename: The input filename.
				2276
				2277	Returns:
				2278	The filename with the common suffix removed.
				2279	"""
				2280	for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
				2281	'inl.h', 'impl.h', 'internal.h'):
				2282	if (filename.endswith(suffix) and len(filename) > len(suffix) and
				2283	filename[-len(suffix) - 1] in ('-', '_')):
				2284	return filename[:-len(suffix) - 1]
				2285	return os.path.splitext(filename)[0]
				2286
				2287
				2288	def _IsTestFilename(filename):
				2289	"""Determines if the given filename has a suffix that identifies it as a test.
				2290
				2291	Args:
				2292	filename: The input filename.
				2293
				2294	Returns:
				2295	True if 'filename' looks like a test, False otherwise.
				2296	"""
				2297	if (filename.endswith('_test.cc') or
				2298	filename.endswith('_unittest.cc') or
				2299	filename.endswith('_regtest.cc')):
				2300	return True
				2301	else:
				2302	return False
				2303
				2304
				2305	def _ClassifyInclude(fileinfo, include, is_system):
				2306	"""Figures out what kind of header 'include' is.
				2307
				2308	Args:
				2309	fileinfo: The current file cpplint is running over. A FileInfo instance.
				2310	include: The path to a #included file.
				2311	is_system: True if the #include used <> rather than "".
				2312
				2313	Returns:
				2314	One of the _XXX_HEADER constants.
				2315
				2316	For example:
				2317	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
				2318	_C_SYS_HEADER
				2319	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
				2320	_CPP_SYS_HEADER
				2321	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
				2322	_LIKELY_MY_HEADER
				2323	>>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
				2324	... 'bar/foo_other_ext.h', False)
				2325	_POSSIBLE_MY_HEADER
				2326	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
				2327	_OTHER_HEADER
				2328	"""
				2329	# This is a list of all standard c++ header files, except
				2330	# those already checked for above.
				2331	is_stl_h = include in _STL_HEADERS
				2332	is_cpp_h = is_stl_h or include in _CPP_HEADERS
				2333
				2334	if is_system:
				2335	if is_cpp_h:
				2336	return _CPP_SYS_HEADER
				2337	else:
				2338	return _C_SYS_HEADER
				2339
				2340	# If the target file and the include we're checking share a
				2341	# basename when we drop common extensions, and the include
				2342	# lives in . , then it's likely to be owned by the target file.
				2343	target_dir, target_base = (
				2344	os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
				2345	include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
				2346	if target_base == include_base and (
				2347	include_dir == target_dir or
				2348	include_dir == os.path.normpath(target_dir + '/../public')):
				2349	return _LIKELY_MY_HEADER
				2350
				2351	# If the target and include share some initial basename
				2352	# component, it's possible the target is implementing the
				2353	# include, so it's allowed to be first, but we'll never
				2354	# complain if it's not there.
				2355	target_first_component = _RE_FIRST_COMPONENT.match(target_base)
				2356	include_first_component = _RE_FIRST_COMPONENT.match(include_base)
				2357	if (target_first_component and include_first_component and
				2358	target_first_component.group(0) ==
				2359	include_first_component.group(0)):
				2360	return _POSSIBLE_MY_HEADER
				2361
				2362	return _OTHER_HEADER
				2363
				2364
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2365
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	2366	def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
				2367	"""Check rules that are applicable to #include lines.
				2368
				2369	Strings on #include lines are NOT removed from elided line, to make
				2370	certain tasks easier. However, to prevent false positives, checks
				2371	applicable to #include lines in CheckLanguage must be put here.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2372
				2373	Args:
				2374	filename: The name of the current file.
				2375	clean_lines: A CleansedLines instance containing the file.
				2376	linenum: The number of the line to check.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2377	include_state: An _IncludeState instance in which the headers are inserted.
				2378	error: The function to call with any errors found.
				2379	"""
				2380	fileinfo = FileInfo(filename)
				2381
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	2382	line = clean_lines.lines[linenum]
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2383
				2384	# "include" should use the new style "foo/bar.h" instead of just "bar.h"
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	2385	if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2386	error(filename, linenum, 'build/include', 4,
				2387	'Include the directory when naming .h files')
				2388
				2389	# we shouldn't include a file more than once. actually, there are a
				2390	# handful of instances where doing so is okay, but in general it's
				2391	# not.
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	2392	match = _RE_PATTERN_INCLUDE.search(line)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2393	if match:
				2394	include = match.group(2)
				2395	is_system = (match.group(1) == '<')
				2396	if include in include_state:
				2397	error(filename, linenum, 'build/include', 4,
				2398	'"%s" already included at %s:%s' %
				2399	(include, filename, include_state[include]))
				2400	else:
				2401	include_state[include] = linenum
				2402
				2403	# We want to ensure that headers appear in the right order:
				2404	# 1) for foo.cc, foo.h (preferred location)
				2405	# 2) c system files
				2406	# 3) cpp system files
				2407	# 4) for foo.cc, foo.h (deprecated location)
				2408	# 5) other google headers
				2409	#
				2410	# We classify each include statement as one of those 5 types
				2411	# using a number of techniques. The include_state object keeps
				2412	# track of the highest type seen, and complains if we see a
				2413	# lower type after that.
				2414	error_message = include_state.CheckNextIncludeOrder(
				2415	_ClassifyInclude(fileinfo, include, is_system))
				2416	if error_message:
				2417	error(filename, linenum, 'build/include_order', 4,
				2418	'%s. Should be: %s.h, c system, c++ system, other.' %
				2419	(error_message, fileinfo.BaseName()))
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	2420	if not include_state.IsInAlphabeticalOrder(include):
				2421	error(filename, linenum, 'build/include_alpha', 4,
				2422	'Include "%s" not in alphabetical order' % include)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2423
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	2424	# Look for any of the stream classes that are part of standard C++.
				2425	match = _RE_PATTERN_INCLUDE.match(line)
				2426	if match:
				2427	include = match.group(2)
				2428	if Match(r'(f\|ind\|io\|i\|o\|parse\|pf\|stdio\|str\|)?stream$', include):
				2429	# Many unit tests use cout, so we exempt them.
				2430	if not _IsTestFilename(filename):
				2431	error(filename, linenum, 'readability/streams', 3,
				2432	'Streams are highly discouraged.')
				2433
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2434
				2435	def _GetTextInside(text, start_pattern):
				2436	"""Retrieves all the text between matching open and close parentheses.
				2437
				2438	Given a string of lines and a regular expression string, retrieve all the text
				2439	following the expression and between opening punctuation symbols like
				2440	(, [, or {, and the matching close-punctuation symbol. This properly nested
				2441	occurrences of the punctuations, so for the text like
				2442	printf(a(), b(c()));
				2443	a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
				2444	start_pattern must match string having an open punctuation symbol at the end.
				2445
				2446	Args:
				2447	text: The lines to extract text. Its comments and strings must be elided.
				2448	It can be single line and can span multiple lines.
				2449	start_pattern: The regexp string indicating where to start extracting
				2450	the text.
				2451	Returns:
				2452	The extracted text.
				2453	None if either the opening string or ending punctuation could not be found.
				2454	"""
				2455	# TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
				2456	# rewritten to use _GetTextInside (and use inferior regexp matching today).
				2457
				2458	# Give opening punctuations to get the matching close-punctuations.
				2459	matching_punctuation = {'(': ')', '{': '}', '[': ']'}
				2460	closing_punctuation = set(matching_punctuation.itervalues())
				2461
				2462	# Find the position to start extracting text.
				2463	match = re.search(start_pattern, text, re.M)
				2464	if not match: # start_pattern not found in text.
				2465	return None
				2466	start_position = match.end(0)
				2467
				2468	assert start_position > 0, (
				2469	'start_pattern must ends with an opening punctuation.')
				2470	assert text[start_position - 1] in matching_punctuation, (
				2471	'start_pattern must ends with an opening punctuation.')
				2472	# Stack of closing punctuations we expect to have in text after position.
				2473	punctuation_stack = [matching_punctuation[text[start_position - 1]]]
				2474	position = start_position
				2475	while punctuation_stack and position < len(text):
				2476	if text[position] == punctuation_stack[-1]:
				2477	punctuation_stack.pop()
				2478	elif text[position] in closing_punctuation:
				2479	# A closing punctuation without matching opening punctuations.
				2480	return None
				2481	elif text[position] in matching_punctuation:
				2482	punctuation_stack.append(matching_punctuation[text[position]])
				2483	position += 1
				2484	if punctuation_stack:
				2485	# Opening punctuations left without matching close-punctuations.
				2486	return None
				2487	# punctuations match.
				2488	return text[start_position:position - 1]
				2489
				2490
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	2491	def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
				2492	error):
				2493	"""Checks rules from the 'C++ language rules' section of cppguide.html.
				2494
				2495	Some of these rules are hard to test (function overloading, using
				2496	uint32 inappropriately), but we do the best we can.
				2497
				2498	Args:
				2499	filename: The name of the current file.
				2500	clean_lines: A CleansedLines instance containing the file.
				2501	linenum: The number of the line to check.
				2502	file_extension: The extension (without the dot) of the filename.
				2503	include_state: An _IncludeState instance in which the headers are inserted.
				2504	error: The function to call with any errors found.
				2505	"""
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2506	# If the line is empty or consists of entirely a comment, no need to
				2507	# check it.
				2508	line = clean_lines.elided[linenum]
				2509	if not line:
				2510	return
				2511
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	2512	match = _RE_PATTERN_INCLUDE.search(line)
				2513	if match:
				2514	CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
				2515	return
				2516
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2517	# Create an extended_line, which is the concatenation of the current and
				2518	# next lines, for more effective checking of code that may span more than one
				2519	# line.
				2520	if linenum + 1 < clean_lines.NumLines():
				2521	extended_line = line + clean_lines.elided[linenum + 1]
				2522	else:
				2523	extended_line = line
				2524
				2525	# Make Windows paths like Unix.
				2526	fullname = os.path.abspath(filename).replace('\\', '/')
				2527
				2528	# TODO(unknown): figure out if they're using default arguments in fn proto.
				2529
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2530	# Check for non-const references in functions. This is tricky because &
				2531	# is also used to take the address of something. We allow <> for templates,
				2532	# (ignoring whatever is between the braces) and : for classes.
				2533	# These are complicated re's. They try to capture the following:
				2534	# paren (for fn-prototype start), typename, &, varname. For the const
				2535	# version, we're willing for const to be before typename or after
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2536	# Don't check the implementation on same line.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2537	fnline = line.split('{', 1)[0]
				2538	if (len(re.findall(r'\([^()]\b(?:[\w:]\|<[^()]>)+(\s?&\|&\s?)\w+', fnline)) >
				2539	len(re.findall(r'\([^()]*\bconst\s+(?:typename\s+)?(?:struct\s+)?'
				2540	r'(?:[\w:]\|<[^()]*>)+(\s?&\|&\s?)\w+', fnline)) +
				2541	len(re.findall(r'\([^()]\b(?:[\w:]\|<[^()]>)+\s+const(\s?&\|&\s?)[\w]+',
				2542	fnline))):
				2543
				2544	# We allow non-const references in a few standard places, like functions
				2545	# called "swap()" or iostream operators like "<<" or ">>".
				2546	if not Search(
				2547	r'(swap\|Swap\|operator[<>][<>])\s\(\s(?:[\w:]\|<.>)+\s&',
				2548	fnline):
				2549	error(filename, linenum, 'runtime/references', 2,
				2550	'Is this a non-const reference? '
				2551	'If so, make const or use a pointer.')
				2552
				2553	# Check to see if they're using an conversion function cast.
				2554	# I just try to capture the most common basic types, though there are more.
				2555	# Parameterless conversion functions, such as bool(), are allowed as they are
				2556	# probably a member operator declaration or default constructor.
				2557	match = Search(
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	2558	r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there
				2559	r'(int\|float\|double\|bool\|char\|int32\|uint32\|int64\|uint64)\([^)]', line)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2560	if match:
				2561	# gMock methods are defined using some variant of MOCK_METHODx(name, type)
				2562	# where type may be float(), int(string), etc. Without context they are
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2563	# virtually indistinguishable from int(x) casts. Likewise, gMock's
				2564	# MockCallback takes a template parameter of the form return_type(arg_type),
				2565	# which looks much like the cast we're trying to detect.
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	2566	if (match.group(1) is None and # If new operator, then this isn't a cast
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2567	not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
				2568	Match(r'^\sMockCallback<.>', line))):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2569	error(filename, linenum, 'readability/casting', 4,
				2570	'Using deprecated casting style. '
				2571	'Use static_cast<%s>(...) instead' %
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	2572	match.group(2))
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2573
				2574	CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				2575	'static_cast',
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2576	r'$(int\|float\|double\|bool\|char\|u?int(16\|32\|64))$', error)
				2577
				2578	# This doesn't catch all cases. Consider (const char * const)"hello".
				2579	#
				2580	# (char *) "foo" should always be a const_cast (reinterpret_cast won't
				2581	# compile).
				2582	if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				2583	'const_cast', r'$(char\s?\+\s?)$\s"', error):
				2584	pass
				2585	else:
				2586	# Check pointer casts for other than string constants
				2587	CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				2588	'reinterpret_cast', r'$(\w+\s?\*+\s?)$', error)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2589
				2590	# In addition, we look for people taking the address of a cast. This
				2591	# is dangerous -- casts can assign to temporaries, so the pointer doesn't
				2592	# point where you think.
				2593	if Search(
				2594	r'(&$[^)]+$[\w(])\|(&(static\|dynamic\|reinterpret)_cast\b)', line):
				2595	error(filename, linenum, 'runtime/casting', 4,
				2596	('Are you taking an address of a cast? '
				2597	'This is dangerous: could be a temp var. '
				2598	'Take the address before doing the cast, rather than after'))
				2599
				2600	# Check for people declaring static/global STL strings at the top level.
				2601	# This is dangerous because the C++ language does not guarantee that
				2602	# globals with constructors are initialized before the first access.
				2603	match = Match(
				2604	r'((?:\|static +)(?:\|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
				2605	line)
				2606	# Make sure it's not a function.
				2607	# Function template specialization looks like: "string foo<Type>(...".
				2608	# Class template definitions look like: "string Foo<Type>::Method(...".
				2609	if match and not Match(r'\s(<.>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]\|$)',
				2610	match.group(3)):
				2611	error(filename, linenum, 'runtime/string', 4,
				2612	'For a static/global string constant, use a C style string instead: '
				2613	'"%schar %s[]".' %
				2614	(match.group(1), match.group(2)))
				2615
				2616	# Check that we're not using RTTI outside of testing code.
				2617	if Search(r'\bdynamic_cast<', line) and not _IsTestFilename(filename):
				2618	error(filename, linenum, 'runtime/rtti', 5,
				2619	'Do not use dynamic_cast<>. If you need to cast within a class '
				2620	"hierarchy, use static_cast<> to upcast. Google doesn't support "
				2621	'RTTI.')
				2622
				2623	if Search(r'\b([A-Za-z0-9_]*_)$\1$', line):
				2624	error(filename, linenum, 'runtime/init', 4,
				2625	'You seem to be initializing a member variable with itself.')
				2626
				2627	if file_extension == 'h':
				2628	# TODO(unknown): check that 1-arg constructors are explicit.
				2629	# How to tell it's a constructor?
				2630	# (handled in CheckForNonStandardConstructs for now)
				2631	# TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
				2632	# (level 1 error)
				2633	pass
				2634
				2635	# Check if people are using the verboten C basic types. The only exception
				2636	# we regularly allow is "unsigned short port" for port.
				2637	if Search(r'\bshort port\b', line):
				2638	if not Search(r'\bunsigned short port\b', line):
				2639	error(filename, linenum, 'runtime/int', 4,
				2640	'Use "unsigned short" for ports, not "short"')
				2641	else:
				2642	match = Search(r'\b(short\|long(?! +double)\|long long)\b', line)
				2643	if match:
				2644	error(filename, linenum, 'runtime/int', 4,
				2645	'Use int16/int64/etc, rather than the C type %s' % match.group(1))
				2646
				2647	# When snprintf is used, the second argument shouldn't be a literal.
				2648	match = Search(r'snprintf\s\(([^,]),\s([0-9])\s*,', line)
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	2649	if match and match.group(2) != '0':
				2650	# If 2nd arg is zero, snprintf is used to calculate size.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2651	error(filename, linenum, 'runtime/printf', 3,
				2652	'If you can, use sizeof(%s) instead of %s as the 2nd arg '
				2653	'to snprintf.' % (match.group(1), match.group(2)))
				2654
				2655	# Check if some verboten C functions are being used.
				2656	if Search(r'\bsprintf\b', line):
				2657	error(filename, linenum, 'runtime/printf', 5,
				2658	'Never use sprintf. Use snprintf instead.')
				2659	match = Search(r'\b(strcpy\|strcat)\b', line)
				2660	if match:
				2661	error(filename, linenum, 'runtime/printf', 4,
				2662	'Almost always, snprintf is better than %s' % match.group(1))
				2663
				2664	if Search(r'\bsscanf\b', line):
				2665	error(filename, linenum, 'runtime/printf', 1,
				2666	'sscanf can be ok, but is slow and can overflow buffers.')
				2667
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	2668	# Check if some verboten operator overloading is going on
				2669	# TODO(unknown): catch out-of-line unary operator&:
				2670	# class X {};
				2671	# int operator&(const X& x) { return 42; } // unary operator&
				2672	# The trick is it's hard to tell apart from binary operator&:
				2673	# class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
				2674	if Search(r'\boperator\s&\s$\s*$', line):
				2675	error(filename, linenum, 'runtime/operator', 4,
				2676	'Unary operator& is dangerous. Do not use it.')
				2677
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2678	# Check for suspicious usage of "if" like
				2679	# } if (a == b) {
				2680	if Search(r'\}\sif\s\(', line):
				2681	error(filename, linenum, 'readability/braces', 4,
				2682	'Did you mean "else if"? If not, start a new line for "if".')
				2683
				2684	# Check for potential format string bugs like printf(foo).
				2685	# We constrain the pattern not to pick things like DocidForPrintf(foo).
				2686	# Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2687	# TODO(sugawarayu): Catch the following case. Need to change the calling
				2688	# convention of the whole function to process multiple line to handle it.
				2689	# printf(
				2690	# boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
				2691	printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
				2692	if printf_args:
				2693	match = Match(r'([\w.\->()]+)$', printf_args)
				2694	if match:
				2695	function_name = re.search(r'\b((?:string)?printf)\s*\(',
				2696	line, re.I).group(1)
				2697	error(filename, linenum, 'runtime/printf', 4,
				2698	'Potential format string bug. Do %s("%%s", %s) instead.'
				2699	% (function_name, match.group(1)))
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2700
				2701	# Check for potential memset bugs like memset(buf, sizeof(buf), 0).
				2702	match = Search(r'memset\s$([^,]),\s([^,]),\s0\s$', line)
				2703	if match and not Match(r"^''\|-?[0-9]+\|0x[0-9A-Fa-f]$", match.group(2)):
				2704	error(filename, linenum, 'runtime/memset', 4,
				2705	'Did you mean "memset(%s, 0, %s)"?'
				2706	% (match.group(1), match.group(2)))
				2707
				2708	if Search(r'\busing namespace\b', line):
				2709	error(filename, linenum, 'build/namespaces', 5,
				2710	'Do not use namespace using-directives. '
				2711	'Use using-declarations instead.')
				2712
				2713	# Detect variable-length arrays.
				2714	match = Match(r'\s(.+::)?(\w+) [a-z]\w\[(.+)];', line)
				2715	if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
				2716	match.group(3).find(']') == -1):
				2717	# Split the size using space and arithmetic operators as delimiters.
				2718	# If any of the resulting tokens are not compile time constants then
				2719	# report the error.
				2720	tokens = re.split(r'\s\|\+\|\-\|\*\|\/\|<<\|>>]', match.group(3))
				2721	is_const = True
				2722	skip_next = False
				2723	for tok in tokens:
				2724	if skip_next:
				2725	skip_next = False
				2726	continue
				2727
				2728	if Search(r'sizeof$.+$', tok): continue
				2729	if Search(r'arraysize$\w+$', tok): continue
				2730
				2731	tok = tok.lstrip('(')
				2732	tok = tok.rstrip(')')
				2733	if not tok: continue
				2734	if Match(r'\d+', tok): continue
				2735	if Match(r'0[xX][0-9a-fA-F]+', tok): continue
				2736	if Match(r'k[A-Z0-9]\w*', tok): continue
				2737	if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
				2738	if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
				2739	# A catch all for tricky sizeof cases, including 'sizeof expression',
				2740	# 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2741	# requires skipping the next token because we split on ' ' and '*'.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2742	if tok.startswith('sizeof'):
				2743	skip_next = True
				2744	continue
				2745	is_const = False
				2746	break
				2747	if not is_const:
				2748	error(filename, linenum, 'runtime/arrays', 1,
				2749	'Do not use variable-length arrays. Use an appropriately named '
				2750	"('k' followed by CamelCase) compile-time constant for the size.")
				2751
				2752	# If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
				2753	# DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
				2754	# in the class declaration.
				2755	match = Match(
				2756	(r'\s*'
				2757	r'(DISALLOW_(EVIL_CONSTRUCTORS\|COPY_AND_ASSIGN\|IMPLICIT_CONSTRUCTORS))'
				2758	r'$.*$;$'),
				2759	line)
				2760	if match and linenum + 1 < clean_lines.NumLines():
				2761	next_line = clean_lines.elided[linenum + 1]
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2762	# We allow some, but not all, declarations of variables to be present
				2763	# in the statement that defines the class. The [\w\,\s] fragment of
				2764	# the regular expression below allows users to declare instances of
				2765	# the class or pointers to instances, but not less common types such
				2766	# as function pointers or arrays. It's a tradeoff between allowing
				2767	# reasonable code and avoiding trying to parse more C++ using regexps.
				2768	if not Search(r'^\s}[\w\,\s]*;', next_line):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2769	error(filename, linenum, 'readability/constructors', 3,
				2770	match.group(1) + ' should be the last thing in the class')
				2771
				2772	# Check for use of unnamed namespaces in header files. Registration
				2773	# macros are typically OK, so we allow use of "namespace {" on lines
				2774	# that end with backslashes.
				2775	if (file_extension == 'h'
				2776	and Search(r'\bnamespace\s*{', line)
				2777	and line[-1] != '\\'):
				2778	error(filename, linenum, 'build/namespaces', 4,
				2779	'Do not use unnamed namespaces in header files. See '
				2780	'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
				2781	' for more information.')
				2782
				2783
				2784	def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
				2785	error):
				2786	"""Checks for a C-style cast by looking for the pattern.
				2787
				2788	This also handles sizeof(type) warnings, due to similarity of content.
				2789
				2790	Args:
				2791	filename: The name of the current file.
				2792	linenum: The number of the line to check.
				2793	line: The line of code to check.
				2794	raw_line: The raw line of code to check, with comments.
				2795	cast_type: The string for the C++ cast to recommend. This is either
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2796	reinterpret_cast, static_cast, or const_cast, depending.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2797	pattern: The regular expression used to find C-style casts.
				2798	error: The function to call with any errors found.
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2799
				2800	Returns:
				2801	True if an error was emitted.
				2802	False otherwise.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2803	"""
				2804	match = Search(pattern, line)
				2805	if not match:
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2806	return False
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2807
				2808	# e.g., sizeof(int)
				2809	sizeof_match = Match(r'.sizeof\s$', line[0:match.start(1) - 1])
				2810	if sizeof_match:
				2811	error(filename, linenum, 'runtime/sizeof', 1,
				2812	'Using sizeof(type). Use sizeof(varname) instead if possible')
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2813	return True
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2814
				2815	remainder = line[match.end(0):]
				2816
				2817	# The close paren is for function pointers as arguments to a function.
				2818	# eg, void foo(void (*bar)(int));
				2819	# The semicolon check is a more basic function check; also possibly a
				2820	# function pointer typedef.
				2821	# eg, void foo(int); or void foo(int) const;
				2822	# The equals check is for function pointer assignment.
				2823	# eg, void (foo)(int) = ...
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2824	# The > is for MockCallback<...> ...
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2825	#
				2826	# Right now, this will only catch cases where there's a single argument, and
				2827	# it's unnamed. It should probably be expanded to check for multiple
				2828	# arguments with some unnamed.
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2829	function_match = Match(r'\s(\)\|=\|(const)?\s(;\|\{\|throw\|>))', remainder)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2830	if function_match:
				2831	if (not function_match.group(3) or
				2832	function_match.group(3) == ';' or
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2833	('MockCallback<' not in raw_line and
				2834	'/*' not in raw_line)):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2835	error(filename, linenum, 'readability/function', 3,
				2836	'All parameters should be named in a function')
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2837	return True
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2838
				2839	# At this point, all that should be left is actual casts.
				2840	error(filename, linenum, 'readability/casting', 4,
				2841	'Using C-style cast. Use %s<%s>(...) instead' %
				2842	(cast_type, match.group(1)))
				2843
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	2844	return True
				2845
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2846
				2847	_HEADERS_CONTAINING_TEMPLATES = (
				2848	('<deque>', ('deque',)),
				2849	('<functional>', ('unary_function', 'binary_function',
				2850	'plus', 'minus', 'multiplies', 'divides', 'modulus',
				2851	'negate',
				2852	'equal_to', 'not_equal_to', 'greater', 'less',
				2853	'greater_equal', 'less_equal',
				2854	'logical_and', 'logical_or', 'logical_not',
				2855	'unary_negate', 'not1', 'binary_negate', 'not2',
				2856	'bind1st', 'bind2nd',
				2857	'pointer_to_unary_function',
				2858	'pointer_to_binary_function',
				2859	'ptr_fun',
				2860	'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
				2861	'mem_fun_ref_t',
				2862	'const_mem_fun_t', 'const_mem_fun1_t',
				2863	'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
				2864	'mem_fun_ref',
				2865	)),
				2866	('<limits>', ('numeric_limits',)),
				2867	('<list>', ('list',)),
				2868	('<map>', ('map', 'multimap',)),
				2869	('<memory>', ('allocator',)),
				2870	('<queue>', ('queue', 'priority_queue',)),
				2871	('<set>', ('set', 'multiset',)),
				2872	('<stack>', ('stack',)),
				2873	('<string>', ('char_traits', 'basic_string',)),
				2874	('<utility>', ('pair',)),
				2875	('<vector>', ('vector',)),
				2876
				2877	# gcc extensions.
				2878	# Note: std::hash is their hash, ::hash is our hash
				2879	('<hash_map>', ('hash_map', 'hash_multimap',)),
				2880	('<hash_set>', ('hash_set', 'hash_multiset',)),
				2881	('<slist>', ('slist',)),
				2882	)
				2883
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2884	_RE_PATTERN_STRING = re.compile(r'\bstring\b')
				2885
				2886	_re_pattern_algorithm_header = []
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	2887	for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
				2888	'transform'):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2889	# Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
				2890	# type::max().
				2891	_re_pattern_algorithm_header.append(
				2892	(re.compile(r'[^>.]\b' + _template + r'(<.*?>)?$[^$]'),
				2893	_template,
				2894	'<algorithm>'))
				2895
				2896	_re_pattern_templates = []
				2897	for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
				2898	for _template in _templates:
				2899	_re_pattern_templates.append(
				2900	(re.compile(r'(\<\|\b)' + _template + r'\s*\<'),
				2901	_template + '<>',
				2902	_header))
				2903
				2904
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	2905	def FilesBelongToSameModule(filename_cc, filename_h):
				2906	"""Check if these two filenames belong to the same module.
				2907
				2908	The concept of a 'module' here is a as follows:
				2909	foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
				2910	same 'module' if they are in the same directory.
				2911	some/path/public/xyzzy and some/path/internal/xyzzy are also considered
				2912	to belong to the same module here.
				2913
				2914	If the filename_cc contains a longer path than the filename_h, for example,
				2915	'/absolute/path/to/base/sysinfo.cc', and this file would include
				2916	'base/sysinfo.h', this function also produces the prefix needed to open the
				2917	header. This is used by the caller of this function to more robustly open the
				2918	header file. We don't have access to the real include paths in this context,
				2919	so we need this guesswork here.
				2920
				2921	Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
				2922	according to this implementation. Because of this, this function gives
				2923	some false positives. This should be sufficiently rare in practice.
				2924
				2925	Args:
				2926	filename_cc: is the path for the .cc file
				2927	filename_h: is the path for the header path
				2928
				2929	Returns:
				2930	Tuple with a bool and a string:
				2931	bool: True if filename_cc and filename_h belong to the same module.
				2932	string: the additional prefix needed to open the header file.
				2933	"""
				2934
				2935	if not filename_cc.endswith('.cc'):
				2936	return (False, '')
				2937	filename_cc = filename_cc[:-len('.cc')]
				2938	if filename_cc.endswith('_unittest'):
				2939	filename_cc = filename_cc[:-len('_unittest')]
				2940	elif filename_cc.endswith('_test'):
				2941	filename_cc = filename_cc[:-len('_test')]
				2942	filename_cc = filename_cc.replace('/public/', '/')
				2943	filename_cc = filename_cc.replace('/internal/', '/')
				2944
				2945	if not filename_h.endswith('.h'):
				2946	return (False, '')
				2947	filename_h = filename_h[:-len('.h')]
				2948	if filename_h.endswith('-inl'):
				2949	filename_h = filename_h[:-len('-inl')]
				2950	filename_h = filename_h.replace('/public/', '/')
				2951	filename_h = filename_h.replace('/internal/', '/')
				2952
				2953	files_belong_to_same_module = filename_cc.endswith(filename_h)
				2954	common_path = ''
				2955	if files_belong_to_same_module:
				2956	common_path = filename_cc[:-len(filename_h)]
				2957	return files_belong_to_same_module, common_path
				2958
				2959
				2960	def UpdateIncludeState(filename, include_state, io=codecs):
				2961	"""Fill up the include_state with new includes found from the file.
				2962
				2963	Args:
				2964	filename: the name of the header to read.
				2965	include_state: an _IncludeState instance in which the headers are inserted.
				2966	io: The io factory to use to read the file. Provided for testability.
				2967
				2968	Returns:
				2969	True if a header was succesfully added. False otherwise.
				2970	"""
				2971	headerfile = None
				2972	try:
				2973	headerfile = io.open(filename, 'r', 'utf8', 'replace')
				2974	except IOError:
				2975	return False
				2976	linenum = 0
				2977	for line in headerfile:
				2978	linenum += 1
				2979	clean_line = CleanseComments(line)
				2980	match = _RE_PATTERN_INCLUDE.search(clean_line)
				2981	if match:
				2982	include = match.group(2)
				2983	# The value formatting is cute, but not really used right now.
				2984	# What matters here is that the key is in include_state.
				2985	include_state.setdefault(include, '%s:%d' % (filename, linenum))
				2986	return True
				2987
				2988
				2989	def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
				2990	io=codecs):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2991	"""Reports for missing stl includes.
				2992
				2993	This function will output warnings to make sure you are including the headers
				2994	necessary for the stl containers and functions that you use. We only give one
				2995	reason to include a header. For example, if you use both equal_to<> and
				2996	less<> in a .h file, only one (the latter in the file) of these will be
				2997	reported as a reason to include the <functional>.
				2998
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2999	Args:
				3000	filename: The name of the current file.
				3001	clean_lines: A CleansedLines instance containing the file.
				3002	include_state: An _IncludeState instance.
				3003	error: The function to call with any errors found.
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	3004	io: The IO factory to use to read the header file. Provided for unittest
				3005	injection.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3006	"""
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3007	required = {} # A map of header name to linenumber and the template entity.
				3008	# Example of required: { '<functional>': (1219, 'less<>') }
				3009
				3010	for linenum in xrange(clean_lines.NumLines()):
				3011	line = clean_lines.elided[linenum]
				3012	if not line or line[0] == '#':
				3013	continue
				3014
				3015	# String is special -- it is a non-templatized type in STL.
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	3016	matched = _RE_PATTERN_STRING.search(line)
				3017	if matched:
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	3018	# Don't warn about strings in non-STL namespaces:
				3019	# (We check only the first match per line; good enough.)
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	3020	prefix = line[:matched.start()]
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	3021	if prefix.endswith('std::') or not prefix.endswith('::'):
				3022	required['<string>'] = (linenum, 'string')
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3023
				3024	for pattern, template, header in _re_pattern_algorithm_header:
				3025	if pattern.search(line):
				3026	required[header] = (linenum, template)
				3027
				3028	# The following function is just a speed up, no semantics are changed.
				3029	if not '<' in line: # Reduces the cpu time usage by skipping lines.
				3030	continue
				3031
				3032	for pattern, template, header in _re_pattern_templates:
				3033	if pattern.search(line):
				3034	required[header] = (linenum, template)
				3035
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	3036	# The policy is that if you #include something in foo.h you don't need to
				3037	# include it again in foo.cc. Here, we will look at possible includes.
				3038	# Let's copy the include_state so it is only messed up within this function.
				3039	include_state = include_state.copy()
				3040
				3041	# Did we find the header for this file (if any) and succesfully load it?
				3042	header_found = False
				3043
				3044	# Use the absolute path so that matching works properly.
				3045	abs_filename = os.path.abspath(filename)
				3046
				3047	# For Emacs's flymake.
				3048	# If cpplint is invoked from Emacs's flymake, a temporary file is generated
				3049	# by flymake and that file name might end with '_flymake.cc'. In that case,
				3050	# restore original file name here so that the corresponding header file can be
				3051	# found.
				3052	# e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
				3053	# instead of 'foo_flymake.h'
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	3054	abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	3055
				3056	# include_state is modified during iteration, so we iterate over a copy of
				3057	# the keys.
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	3058	header_keys = include_state.keys()
				3059	for header in header_keys:
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	3060	(same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
				3061	fullpath = common_path + header
				3062	if same_module and UpdateIncludeState(fullpath, include_state, io):
				3063	header_found = True
				3064
				3065	# If we can't find the header file for a .cc, assume it's because we don't
				3066	# know where to look. In that case we'll give up as we're not sure they
				3067	# didn't include it in the .h file.
				3068	# TODO(unknown): Do a better job of finding .h files so we are confident that
				3069	# not having the .h file means there isn't one.
				3070	if filename.endswith('.cc') and not header_found:
				3071	return
				3072
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3073	# All the lines have been processed, report the errors found.
				3074	for required_header_unstripped in required:
				3075	template = required[required_header_unstripped][1]
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3076	if required_header_unstripped.strip('<>"') not in include_state:
				3077	error(filename, required[required_header_unstripped][0],
				3078	'build/include_what_you_use', 4,
				3079	'Add #include ' + required_header_unstripped + ' for ' + template)
				3080
				3081
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	3082	_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
				3083
				3084
				3085	def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
				3086	"""Check that make_pair's template arguments are deduced.
				3087
				3088	G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
				3089	specified explicitly, and such use isn't intended in any case.
				3090
				3091	Args:
				3092	filename: The name of the current file.
				3093	clean_lines: A CleansedLines instance containing the file.
				3094	linenum: The number of the line to check.
				3095	error: The function to call with any errors found.
				3096	"""
				3097	raw = clean_lines.raw_lines
				3098	line = raw[linenum]
				3099	match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
				3100	if match:
				3101	error(filename, linenum, 'build/explicit_make_pair',
				3102	4, # 4 = high confidence
				3103	'Omit template arguments from make_pair OR use pair directly OR'
				3104	' if appropriate, construct a pair directly')
				3105
				3106
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3107	def ProcessLine(filename, file_extension,
				3108	clean_lines, line, include_state, function_state,
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	3109	class_state, error, extra_check_functions=[]):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3110	"""Processes a single line in the file.
				3111
				3112	Args:
				3113	filename: Filename of the file that is being processed.
				3114	file_extension: The extension (dot not included) of the file.
				3115	clean_lines: An array of strings, each representing a line of the file,
				3116	with comments stripped.
				3117	line: Number of line being processed.
				3118	include_state: An _IncludeState instance in which the headers are inserted.
				3119	function_state: A _FunctionState instance which counts function lines, etc.
				3120	class_state: A _ClassState instance which maintains information about
				3121	the current stack of nested class declarations being parsed.
				3122	error: A callable to which errors are reported, which takes 4 arguments:
				3123	filename, line number, error level, and message
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	3124	extra_check_functions: An array of additional check functions that will be
				3125	run on each source line. Each function takes 4
				3126	arguments: filename, clean_lines, line, error
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3127	"""
				3128	raw_lines = clean_lines.raw_lines
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	3129	ParseNolintSuppressions(filename, raw_lines[line], line, error)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3130	CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3131	CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	3132	CheckStyle(filename, clean_lines, line, file_extension, class_state, error)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3133	CheckLanguage(filename, clean_lines, line, file_extension, include_state,
				3134	error)
				3135	CheckForNonStandardConstructs(filename, clean_lines, line,
				3136	class_state, error)
				3137	CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	3138	CheckInvalidIncrement(filename, clean_lines, line, error)
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	3139	CheckMakePairUsesDeduction(filename, clean_lines, line, error)
				3140	for check_fn in extra_check_functions:
				3141	check_fn(filename, clean_lines, line, error)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3142
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	3143	def ProcessFileData(filename, file_extension, lines, error,
				3144	extra_check_functions=[]):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3145	"""Performs lint checks and reports any errors to the given error function.
				3146
				3147	Args:
				3148	filename: Filename of the file that is being processed.
				3149	file_extension: The extension (dot not included) of the file.
				3150	lines: An array of strings, each representing a line of the file, with the
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	3151	last element being empty if the file is terminated with a newline.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3152	error: A callable to which errors are reported, which takes 4 arguments:
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	3153	filename, line number, error level, and message
				3154	extra_check_functions: An array of additional check functions that will be
				3155	run on each source line. Each function takes 4
				3156	arguments: filename, clean_lines, line, error
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3157	"""
				3158	lines = (['// marker so line numbers and indices both start at 1'] + lines +
				3159	['// marker so line numbers end in a known way'])
				3160
				3161	include_state = _IncludeState()
				3162	function_state = _FunctionState()
				3163	class_state = _ClassState()
				3164
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	3165	ResetNolintSuppressions()
				3166
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3167	CheckForCopyright(filename, lines, error)
				3168
				3169	if file_extension == 'h':
				3170	CheckForHeaderGuard(filename, lines, error)
				3171
				3172	RemoveMultiLineComments(filename, lines, error)
				3173	clean_lines = CleansedLines(lines)
				3174	for line in xrange(clean_lines.NumLines()):
				3175	ProcessLine(filename, file_extension, clean_lines, line,
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	3176	include_state, function_state, class_state, error,
				3177	extra_check_functions)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3178	class_state.CheckFinished(filename, error)
				3179
				3180	CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
				3181
				3182	# We check here rather than inside ProcessLine so that we see raw
				3183	# lines rather than "cleaned" lines.
				3184	CheckForUnicodeReplacementCharacters(filename, lines, error)
				3185
				3186	CheckForNewlineAtEOF(filename, lines, error)
				3187
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	3188	def ProcessFile(filename, vlevel, extra_check_functions=[]):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3189	"""Does google-lint on a single file.
				3190
				3191	Args:
				3192	filename: The name of the file to parse.
				3193
				3194	vlevel: The level of errors to report. Every error of confidence
				3195	>= verbose_level will be reported. 0 is a good default.
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	3196
				3197	extra_check_functions: An array of additional check functions that will be
				3198	run on each source line. Each function takes 4
				3199	arguments: filename, clean_lines, line, error
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3200	"""
				3201
				3202	_SetVerboseLevel(vlevel)
				3203
				3204	try:
				3205	# Support the UNIX convention of using "-" for stdin. Note that
				3206	# we are not opening the file with universal newline support
				3207	# (which codecs doesn't support anyway), so the resulting lines do
				3208	# contain trailing '\r' characters if we are reading a file that
				3209	# has CRLF endings.
				3210	# If after the split a trailing '\r' is present, it is removed
				3211	# below. If it is not expected to be present (i.e. os.linesep !=
				3212	# '\r\n' as in Windows), a warning is issued below if this file
				3213	# is processed.
				3214
				3215	if filename == '-':
				3216	lines = codecs.StreamReaderWriter(sys.stdin,
				3217	codecs.getreader('utf8'),
				3218	codecs.getwriter('utf8'),
				3219	'replace').read().split('\n')
				3220	else:
				3221	lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
				3222
				3223	carriage_return_found = False
				3224	# Remove trailing '\r'.
				3225	for linenum in range(len(lines)):
				3226	if lines[linenum].endswith('\r'):
				3227	lines[linenum] = lines[linenum].rstrip('\r')
				3228	carriage_return_found = True
				3229
				3230	except IOError:
				3231	sys.stderr.write(
				3232	"Skipping input '%s': Can't open for reading\n" % filename)
				3233	return
				3234
				3235	# Note, if no dot is found, this will give the entire filename as the ext.
				3236	file_extension = filename[filename.rfind('.') + 1:]
				3237
				3238	# When reading from stdin, the extension is unknown, so no cpplint tests
				3239	# should rely on the extension.
				3240	if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
				3241	and file_extension != 'cpp'):
				3242	sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
				3243	else:
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	3244	ProcessFileData(filename, file_extension, lines, Error,
				3245	extra_check_functions)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3246	if carriage_return_found and os.linesep != '\r\n':
asvitkine@chromium.org	8b8d8be	2011-09-08 15:34:45 +0000	[diff] [blame]	3247	# Use 0 for linenum since outputting only one error for potentially
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3248	# several lines.
				3249	Error(filename, 0, 'whitespace/newline', 1,
				3250	'One or more unexpected \\r (^M) found;'
				3251	'better to use only a \\n')
				3252
				3253	sys.stderr.write('Done processing %s\n' % filename)
				3254
				3255
				3256	def PrintUsage(message):
				3257	"""Prints a brief usage string and exits, optionally with an error message.
				3258
				3259	Args:
				3260	message: The optional error message.
				3261	"""
				3262	sys.stderr.write(_USAGE)
				3263	if message:
				3264	sys.exit('\nFATAL ERROR: ' + message)
				3265	else:
				3266	sys.exit(1)
				3267
				3268
				3269	def PrintCategories():
				3270	"""Prints a list of all the error-categories used by error messages.
				3271
				3272	These are the categories used to filter messages via --filter.
				3273	"""
erg@google.com	35589e6	2010-11-17 18:58:16 +0000	[diff] [blame]	3274	sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES))
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3275	sys.exit(0)
				3276
				3277
				3278	def ParseArguments(args):
				3279	"""Parses the command line arguments.
				3280
				3281	This may set the output format and verbosity level as side-effects.
				3282
				3283	Args:
				3284	args: The command line arguments:
				3285
				3286	Returns:
				3287	The list of filenames to lint.
				3288	"""
				3289	try:
				3290	(opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	3291	'counting=',
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3292	'filter='])
				3293	except getopt.GetoptError:
				3294	PrintUsage('Invalid arguments.')
				3295
				3296	verbosity = _VerboseLevel()
				3297	output_format = _OutputFormat()
				3298	filters = ''
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	3299	counting_style = ''
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3300
				3301	for (opt, val) in opts:
				3302	if opt == '--help':
				3303	PrintUsage(None)
				3304	elif opt == '--output':
				3305	if not val in ('emacs', 'vs7'):
				3306	PrintUsage('The only allowed output formats are emacs and vs7.')
				3307	output_format = val
				3308	elif opt == '--verbose':
				3309	verbosity = int(val)
				3310	elif opt == '--filter':
				3311	filters = val
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame]	3312	if not filters:
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3313	PrintCategories()
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	3314	elif opt == '--counting':
				3315	if val not in ('total', 'toplevel', 'detailed'):
				3316	PrintUsage('Valid counting options are total, toplevel, and detailed')
				3317	counting_style = val
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3318
				3319	if not filenames:
				3320	PrintUsage('No files were specified.')
				3321
				3322	_SetOutputFormat(output_format)
				3323	_SetVerboseLevel(verbosity)
				3324	_SetFilters(filters)
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	3325	_SetCountingStyle(counting_style)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3326
				3327	return filenames
				3328
				3329
				3330	def main():
				3331	filenames = ParseArguments(sys.argv[1:])
				3332
				3333	# Change stderr to write with replacement characters so we don't die
				3334	# if we try to print something containing non-ASCII characters.
				3335	sys.stderr = codecs.StreamReaderWriter(sys.stderr,
				3336	codecs.getreader('utf8'),
				3337	codecs.getwriter('utf8'),
				3338	'replace')
				3339
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	3340	_cpplint_state.ResetErrorCounts()
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3341	for filename in filenames:
				3342	ProcessFile(filename, _cpplint_state.verbose_level)
erg@google.com	26970fa	2009-11-17 18:07:32 +0000	[diff] [blame]	3343	_cpplint_state.PrintErrorCounts()
				3344
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	3345	sys.exit(_cpplint_state.error_count > 0)
				3346
				3347
				3348	if __name__ == '__main__':
				3349	main()