Blame - cpplint.py - chromium.googlesource.com/chromium/tools/depot_tools

blob: f7adef7ddc0cc98d06665d7607e397258b028d54 [file] [log] [blame]

maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1	#!/usr/bin/python2.4
				2	#
				3	# cpplint.py is Copyright (C) 2009 Google Inc.
				4	#
				5	# It is free software; you can redistribute it and/or modify it under the
				6	# terms of either:
				7	#
				8	# a) the GNU General Public License as published by the Free Software
				9	# Foundation; either version 1, or (at your option) any later version, or
				10	#
				11	# b) the "Artistic License".
				12
				13	# Here are some issues that I've had people identify in my code during reviews,
				14	# that I think are possible to flag automatically in a lint tool. If these were
				15	# caught by lint, it would save time both for myself and that of my reviewers.
				16	# Most likely, some of these are beyond the scope of the current lint framework,
				17	# but I think it is valuable to retain these wish-list items even if they cannot
				18	# be immediately implemented.
				19	#
				20	# Suggestions
				21	# -----------
				22	# - Check for no 'explicit' for multi-arg ctor
				23	# - Check for boolean assign RHS in parens
				24	# - Check for ctor initializer-list colon position and spacing
				25	# - Check that if there's a ctor, there should be a dtor
				26	# - Check accessors that return non-pointer member variables are
				27	# declared const
				28	# - Check accessors that return non-const pointer member vars are
				29	# not declared const
				30	# - Check for using public includes for testing
				31	# - Check for spaces between brackets in one-line inline method
				32	# - Check for no assert()
				33	# - Check for spaces surrounding operators
				34	# - Check for 0 in pointer context (should be NULL)
				35	# - Check for 0 in char context (should be '\0')
				36	# - Check for camel-case method name conventions for methods
				37	# that are not simple inline getters and setters
				38	# - Check that base classes have virtual destructors
				39	# put " // namespace" after } that closes a namespace, with
				40	# namespace's name after 'namespace' if it is named.
				41	# - Do not indent namespace contents
				42	# - Avoid inlining non-trivial constructors in header files
				43	# include base/basictypes.h if DISALLOW_EVIL_CONSTRUCTORS is used
				44	# - Check for old-school (void) cast for call-sites of functions
				45	# ignored return value
				46	# - Check gUnit usage of anonymous namespace
				47	# - Check for class declaration order (typedefs, consts, enums,
				48	# ctor(s?), dtor, friend declarations, methods, member vars)
				49	#
				50
				51	"""Does google-lint on c++ files.
				52
				53	The goal of this script is to identify places in the code that may
				54	be in non-compliance with google style. It does not attempt to fix
				55	up these problems -- the point is to educate. It does also not
				56	attempt to find all problems, or to ensure that everything it does
				57	find is legitimately a problem.
				58
				59	In particular, we can get very confused by /* and // inside strings!
				60	We do a small hack, which is to ignore //'s with "'s after them on the
				61	same line, but it is far from perfect (in either direction).
				62	"""
				63
				64	import codecs
				65	import getopt
				66	import math # for log
				67	import os
				68	import re
				69	import sre_compile
				70	import string
				71	import sys
				72	import unicodedata
				73
				74
				75	_USAGE = """
				76	Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
				77	<file> [file] ...
				78
				79	The style guidelines this tries to follow are those in
				80	http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
				81
				82	Every problem is given a confidence score from 1-5, with 5 meaning we are
				83	certain of the problem, and 1 meaning it could be a legitimate construct.
				84	This will miss some errors, and is not a substitute for a code review.
				85
				86	To prevent specific lines from being linted, add a '// NOLINT' comment to the
				87	end of the line.
				88
				89	The files passed in will be linted; at least one file must be provided.
				90	Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
				91
				92	Flags:
				93
				94	output=vs7
				95	By default, the output is formatted to ease emacs parsing. Visual Studio
				96	compatible output (vs7) may also be used. Other formats are unsupported.
				97
				98	verbose=#
				99	Specify a number 0-5 to restrict errors to certain verbosity levels.
				100
				101	filter=-x,+y,...
				102	Specify a comma-separated list of category-filters to apply: only
				103	error messages whose category names pass the filters will be printed.
				104	(Category names are printed with the message and look like
				105	"[whitespace/indent]".) Filters are evaluated left to right.
				106	"-FOO" and "FOO" means "do not print categories that start with FOO".
				107	"+FOO" means "do print categories that start with FOO".
				108
				109	Examples: --filter=-whitespace,+whitespace/braces
				110	--filter=whitespace,runtime/printf,+runtime/printf_format
				111	--filter=-,+build/include_what_you_use
				112
				113	To see a list of all the categories used in cpplint, pass no arg:
				114	--filter=
				115	"""
				116
				117	# We categorize each error message we print. Here are the categories.
				118	# We want an explicit list so we can list them all in cpplint --filter=.
				119	# If you add a new error message with a new category, add it to the list
				120	# here! cpplint_unittest.py should tell you if you forget to do this.
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	121	# \ used for clearer layout -- pylint: disable-msg=C6013
				122	_ERROR_CATEGORIES = '''\
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	123	build/class
				124	build/deprecated
				125	build/endif_comment
				126	build/forward_decl
				127	build/header_guard
				128	build/include
				129	build/include_order
				130	build/include_what_you_use
				131	build/namespaces
				132	build/printf_format
				133	build/storage_class
				134	legal/copyright
				135	readability/braces
				136	readability/casting
				137	readability/check
				138	readability/constructors
				139	readability/fn_size
				140	readability/function
				141	readability/multiline_comment
				142	readability/multiline_string
				143	readability/streams
				144	readability/todo
				145	readability/utf8
				146	runtime/arrays
				147	runtime/casting
				148	runtime/explicit
				149	runtime/int
				150	runtime/init
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	151	runtime/invalid_increment
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	152	runtime/memset
				153	runtime/printf
				154	runtime/printf_format
				155	runtime/references
				156	runtime/rtti
				157	runtime/sizeof
				158	runtime/string
				159	runtime/threadsafe_fn
				160	runtime/virtual
				161	whitespace/blank_line
				162	whitespace/braces
				163	whitespace/comma
				164	whitespace/comments
				165	whitespace/end_of_line
				166	whitespace/ending_newline
				167	whitespace/indent
				168	whitespace/labels
				169	whitespace/line_length
				170	whitespace/newline
				171	whitespace/operators
				172	whitespace/parens
				173	whitespace/semicolon
				174	whitespace/tab
				175	whitespace/todo
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	176	'''
				177
				178	# The default state of the category filter. This is overrided by the --filter=
				179	# flag. By default all errors are on, so only add here categories that should be
				180	# off by default (i.e., categories that must be enabled by the --filter= flags).
				181	# All entries here should start with a '-' or '+', as in the --filter= flag.
				182	_DEFAULT_FILTERS = []
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	183
				184	# We used to check for high-bit characters, but after much discussion we
				185	# decided those were OK, as long as they were in UTF-8 and didn't represent
				186	# hard-coded international strings, which belong in a seperate i18n file.
				187
				188	# Headers that we consider STL headers.
				189	_STL_HEADERS = frozenset([
				190	'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
				191	'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
				192	'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
				193	'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
				194	'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
				195	'utility', 'vector', 'vector.h',
				196	])
				197
				198
				199	# Non-STL C++ system headers.
				200	_CPP_HEADERS = frozenset([
				201	'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
				202	'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
				203	'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
				204	'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
				205	'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
				206	'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
				207	'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
				208	'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
				209	'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
				210	'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
				211	'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
				212	'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
				213	'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
				214	])
				215
				216
				217	# Assertion macros. These are defined in base/logging.h and
				218	# testing/base/gunit.h. Note that the _M versions need to come first
				219	# for substring matching to work.
				220	_CHECK_MACROS = [
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	221	'DCHECK', 'CHECK',
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	222	'EXPECT_TRUE_M', 'EXPECT_TRUE',
				223	'ASSERT_TRUE_M', 'ASSERT_TRUE',
				224	'EXPECT_FALSE_M', 'EXPECT_FALSE',
				225	'ASSERT_FALSE_M', 'ASSERT_FALSE',
				226	]
				227
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	228	# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	229	_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
				230
				231	for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
				232	('>=', 'GE'), ('>', 'GT'),
				233	('<=', 'LE'), ('<', 'LT')]:
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	234	_CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	235	_CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
				236	_CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
				237	_CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
				238	_CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
				239	_CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
				240
				241	for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
				242	('>=', 'LT'), ('>', 'LE'),
				243	('<=', 'GT'), ('<', 'GE')]:
				244	_CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
				245	_CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
				246	_CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
				247	_CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
				248
				249
				250	# These constants define types of headers for use with
				251	# _IncludeState.CheckNextIncludeOrder().
				252	_C_SYS_HEADER = 1
				253	_CPP_SYS_HEADER = 2
				254	_LIKELY_MY_HEADER = 3
				255	_POSSIBLE_MY_HEADER = 4
				256	_OTHER_HEADER = 5
				257
				258
				259	_regexp_compile_cache = {}
				260
				261
				262	def Match(pattern, s):
				263	"""Matches the string with the pattern, caching the compiled regexp."""
				264	# The regexp compilation caching is inlined in both Match and Search for
				265	# performance reasons; factoring it out into a separate function turns out
				266	# to be noticeably expensive.
				267	if not pattern in _regexp_compile_cache:
				268	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				269	return _regexp_compile_cache[pattern].match(s)
				270
				271
				272	def Search(pattern, s):
				273	"""Searches the string for the pattern, caching the compiled regexp."""
				274	if not pattern in _regexp_compile_cache:
				275	_regexp_compile_cache[pattern] = sre_compile.compile(pattern)
				276	return _regexp_compile_cache[pattern].search(s)
				277
				278
				279	class _IncludeState(dict):
				280	"""Tracks line numbers for includes, and the order in which includes appear.
				281
				282	As a dict, an _IncludeState object serves as a mapping between include
				283	filename and line number on which that file was included.
				284
				285	Call CheckNextIncludeOrder() once for each header in the file, passing
				286	in the type constants defined above. Calls in an illegal order will
				287	raise an _IncludeError with an appropriate error message.
				288
				289	"""
				290	# self._section will move monotonically through this set. If it ever
				291	# needs to move backwards, CheckNextIncludeOrder will raise an error.
				292	_INITIAL_SECTION = 0
				293	_MY_H_SECTION = 1
				294	_C_SECTION = 2
				295	_CPP_SECTION = 3
				296	_OTHER_H_SECTION = 4
				297
				298	_TYPE_NAMES = {
				299	_C_SYS_HEADER: 'C system header',
				300	_CPP_SYS_HEADER: 'C++ system header',
				301	_LIKELY_MY_HEADER: 'header this file implements',
				302	_POSSIBLE_MY_HEADER: 'header this file may implement',
				303	_OTHER_HEADER: 'other header',
				304	}
				305	_SECTION_NAMES = {
				306	_INITIAL_SECTION: "... nothing. (This can't be an error.)",
				307	_MY_H_SECTION: 'a header this file implements',
				308	_C_SECTION: 'C system header',
				309	_CPP_SECTION: 'C++ system header',
				310	_OTHER_H_SECTION: 'other header',
				311	}
				312
				313	def __init__(self):
				314	dict.__init__(self)
				315	self._section = self._INITIAL_SECTION
				316
				317	def CheckNextIncludeOrder(self, header_type):
				318	"""Returns a non-empty error message if the next header is out of order.
				319
				320	This function also updates the internal state to be ready to check
				321	the next include.
				322
				323	Args:
				324	header_type: One of the _XXX_HEADER constants defined above.
				325
				326	Returns:
				327	The empty string if the header is in the right order, or an
				328	error message describing what's wrong.
				329
				330	"""
				331	error_message = ('Found %s after %s' %
				332	(self._TYPE_NAMES[header_type],
				333	self._SECTION_NAMES[self._section]))
				334
				335	if header_type == _C_SYS_HEADER:
				336	if self._section <= self._C_SECTION:
				337	self._section = self._C_SECTION
				338	else:
				339	return error_message
				340	elif header_type == _CPP_SYS_HEADER:
				341	if self._section <= self._CPP_SECTION:
				342	self._section = self._CPP_SECTION
				343	else:
				344	return error_message
				345	elif header_type == _LIKELY_MY_HEADER:
				346	if self._section <= self._MY_H_SECTION:
				347	self._section = self._MY_H_SECTION
				348	else:
				349	self._section = self._OTHER_H_SECTION
				350	elif header_type == _POSSIBLE_MY_HEADER:
				351	if self._section <= self._MY_H_SECTION:
				352	self._section = self._MY_H_SECTION
				353	else:
				354	# This will always be the fallback because we're not sure
				355	# enough that the header is associated with this file.
				356	self._section = self._OTHER_H_SECTION
				357	else:
				358	assert header_type == _OTHER_HEADER
				359	self._section = self._OTHER_H_SECTION
				360
				361	return ''
				362
				363
				364	class _CppLintState(object):
				365	"""Maintains module-wide state.."""
				366
				367	def __init__(self):
				368	self.verbose_level = 1 # global setting.
				369	self.error_count = 0 # global count of reported errors
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	370	# filters to apply when emitting error messages
				371	self.filters = _DEFAULT_FILTERS[:]
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	372
				373	# output format:
				374	# "emacs" - format that emacs can parse (default)
				375	# "vs7" - format that Microsoft Visual Studio 7 can parse
				376	self.output_format = 'emacs'
				377
				378	def SetOutputFormat(self, output_format):
				379	"""Sets the output format for errors."""
				380	self.output_format = output_format
				381
				382	def SetVerboseLevel(self, level):
				383	"""Sets the module's verbosity, and returns the previous setting."""
				384	last_verbose_level = self.verbose_level
				385	self.verbose_level = level
				386	return last_verbose_level
				387
				388	def SetFilters(self, filters):
				389	"""Sets the error-message filters.
				390
				391	These filters are applied when deciding whether to emit a given
				392	error message.
				393
				394	Args:
				395	filters: A string of comma-separated filters (eg "+whitespace/indent").
				396	Each filter should start with + or -; else we die.
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	397
				398	Raises:
				399	ValueError: The comma-separated filters did not all start with '+' or '-'.
				400	E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	401	"""
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	402	# Default filters always have less priority than the flag ones.
				403	self.filters = _DEFAULT_FILTERS[:]
				404	for filt in filters.split(','):
				405	clean_filt = filt.strip()
				406	if clean_filt:
				407	self.filters.append(clean_filt)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	408	for filt in self.filters:
				409	if not (filt.startswith('+') or filt.startswith('-')):
				410	raise ValueError('Every filter in --filters must start with + or -'
				411	' (%s does not)' % filt)
				412
				413	def ResetErrorCount(self):
				414	"""Sets the module's error statistic back to zero."""
				415	self.error_count = 0
				416
				417	def IncrementErrorCount(self):
				418	"""Bumps the module's error statistic."""
				419	self.error_count += 1
				420
				421
				422	_cpplint_state = _CppLintState()
				423
				424
				425	def _OutputFormat():
				426	"""Gets the module's output format."""
				427	return _cpplint_state.output_format
				428
				429
				430	def _SetOutputFormat(output_format):
				431	"""Sets the module's output format."""
				432	_cpplint_state.SetOutputFormat(output_format)
				433
				434
				435	def _VerboseLevel():
				436	"""Returns the module's verbosity setting."""
				437	return _cpplint_state.verbose_level
				438
				439
				440	def _SetVerboseLevel(level):
				441	"""Sets the module's verbosity, and returns the previous setting."""
				442	return _cpplint_state.SetVerboseLevel(level)
				443
				444
				445	def _Filters():
				446	"""Returns the module's list of output filters, as a list."""
				447	return _cpplint_state.filters
				448
				449
				450	def _SetFilters(filters):
				451	"""Sets the module's error-message filters.
				452
				453	These filters are applied when deciding whether to emit a given
				454	error message.
				455
				456	Args:
				457	filters: A string of comma-separated filters (eg "whitespace/indent").
				458	Each filter should start with + or -; else we die.
				459	"""
				460	_cpplint_state.SetFilters(filters)
				461
				462
				463	class _FunctionState(object):
				464	"""Tracks current function name and the number of lines in its body."""
				465
				466	_NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
				467	_TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
				468
				469	def __init__(self):
				470	self.in_a_function = False
				471	self.lines_in_function = 0
				472	self.current_function = ''
				473
				474	def Begin(self, function_name):
				475	"""Start analyzing function body.
				476
				477	Args:
				478	function_name: The name of the function being tracked.
				479	"""
				480	self.in_a_function = True
				481	self.lines_in_function = 0
				482	self.current_function = function_name
				483
				484	def Count(self):
				485	"""Count line in current function body."""
				486	if self.in_a_function:
				487	self.lines_in_function += 1
				488
				489	def Check(self, error, filename, linenum):
				490	"""Report if too many lines in function body.
				491
				492	Args:
				493	error: The function to call with any errors found.
				494	filename: The name of the current file.
				495	linenum: The number of the line to check.
				496	"""
				497	if Match(r'T(EST\|est)', self.current_function):
				498	base_trigger = self._TEST_TRIGGER
				499	else:
				500	base_trigger = self._NORMAL_TRIGGER
				501	trigger = base_trigger * 2**_VerboseLevel()
				502
				503	if self.lines_in_function > trigger:
				504	error_level = int(math.log(self.lines_in_function / base_trigger, 2))
				505	# 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
				506	if error_level > 5:
				507	error_level = 5
				508	error(filename, linenum, 'readability/fn_size', error_level,
				509	'Small and focused functions are preferred:'
				510	' %s has %d non-comment lines'
				511	' (error triggered by exceeding %d lines).' % (
				512	self.current_function, self.lines_in_function, trigger))
				513
				514	def End(self):
				515	"""Stop analizing function body."""
				516	self.in_a_function = False
				517
				518
				519	class _IncludeError(Exception):
				520	"""Indicates a problem with the include order in a file."""
				521	pass
				522
				523
				524	class FileInfo:
				525	"""Provides utility functions for filenames.
				526
				527	FileInfo provides easy access to the components of a file's path
				528	relative to the project root.
				529	"""
				530
				531	def __init__(self, filename):
				532	self._filename = filename
				533
				534	def FullName(self):
				535	"""Make Windows paths like Unix."""
				536	return os.path.abspath(self._filename).replace('\\', '/')
				537
				538	def RepositoryName(self):
				539	"""FullName after removing the local path to the repository.
				540
				541	If we have a real absolute path name here we can try to do something smart:
				542	detecting the root of the checkout and truncating /path/to/checkout from
				543	the name so that we get header guards that don't include things like
				544	"C:\Documents and Settings\..." or "/home/username/..." in them and thus
				545	people on different computers who have checked the source out to different
				546	locations won't see bogus errors.
				547	"""
				548	fullname = self.FullName()
				549
				550	if os.path.exists(fullname):
				551	project_dir = os.path.dirname(fullname)
				552
				553	if os.path.exists(os.path.join(project_dir, ".svn")):
				554	# If there's a .svn file in the current directory, we recursively look
				555	# up the directory tree for the top of the SVN checkout
				556	root_dir = project_dir
				557	one_up_dir = os.path.dirname(root_dir)
				558	while os.path.exists(os.path.join(one_up_dir, ".svn")):
				559	root_dir = os.path.dirname(root_dir)
				560	one_up_dir = os.path.dirname(one_up_dir)
				561
				562	prefix = os.path.commonprefix([root_dir, project_dir])
				563	return fullname[len(prefix) + 1:]
				564
				565	# Not SVN? Try to find a git top level directory by searching up from the
				566	# current path.
				567	root_dir = os.path.dirname(fullname)
				568	while (root_dir != os.path.dirname(root_dir) and
				569	not os.path.exists(os.path.join(root_dir, ".git"))):
				570	root_dir = os.path.dirname(root_dir)
				571	if os.path.exists(os.path.join(root_dir, ".git")):
				572	prefix = os.path.commonprefix([root_dir, project_dir])
				573	return fullname[len(prefix) + 1:]
				574
				575	# Don't know what to do; header guard warnings may be wrong...
				576	return fullname
				577
				578	def Split(self):
				579	"""Splits the file into the directory, basename, and extension.
				580
				581	For 'chrome/browser/browser.cc', Split() would
				582	return ('chrome/browser', 'browser', '.cc')
				583
				584	Returns:
				585	A tuple of (directory, basename, extension).
				586	"""
				587
				588	googlename = self.RepositoryName()
				589	project, rest = os.path.split(googlename)
				590	return (project,) + os.path.splitext(rest)
				591
				592	def BaseName(self):
				593	"""File base name - text after the final slash, before the final period."""
				594	return self.Split()[1]
				595
				596	def Extension(self):
				597	"""File extension - text following the final period."""
				598	return self.Split()[2]
				599
				600	def NoExtension(self):
				601	"""File has no source file extension."""
				602	return '/'.join(self.Split()[0:2])
				603
				604	def IsSource(self):
				605	"""File has a source file extension."""
				606	return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
				607
				608
				609	def _ShouldPrintError(category, confidence):
				610	"""Returns true iff confidence >= verbose, and category passes filter."""
				611	# There are two ways we might decide not to print an error message:
				612	# the verbosity level isn't high enough, or the filters filter it out.
				613	if confidence < _cpplint_state.verbose_level:
				614	return False
				615
				616	is_filtered = False
				617	for one_filter in _Filters():
				618	if one_filter.startswith('-'):
				619	if category.startswith(one_filter[1:]):
				620	is_filtered = True
				621	elif one_filter.startswith('+'):
				622	if category.startswith(one_filter[1:]):
				623	is_filtered = False
				624	else:
				625	assert False # should have been checked for in SetFilter.
				626	if is_filtered:
				627	return False
				628
				629	return True
				630
				631
				632	def Error(filename, linenum, category, confidence, message):
				633	"""Logs the fact we've found a lint error.
				634
				635	We log where the error was found, and also our confidence in the error,
				636	that is, how certain we are this is a legitimate style regression, and
				637	not a misidentification or a use that's sometimes justified.
				638
				639	Args:
				640	filename: The name of the file containing the error.
				641	linenum: The number of the line containing the error.
				642	category: A string used to describe the "category" this bug
				643	falls under: "whitespace", say, or "runtime". Categories
				644	may have a hierarchy separated by slashes: "whitespace/indent".
				645	confidence: A number from 1-5 representing a confidence score for
				646	the error, with 5 meaning that we are certain of the problem,
				647	and 1 meaning that it could be a legitimate construct.
				648	message: The error message.
				649	"""
				650	# There are two ways we might decide not to print an error message:
				651	# the verbosity level isn't high enough, or the filters filter it out.
				652	if _ShouldPrintError(category, confidence):
				653	_cpplint_state.IncrementErrorCount()
				654	if _cpplint_state.output_format == 'vs7':
				655	sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
				656	filename, linenum, message, category, confidence))
				657	else:
				658	sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
				659	filename, linenum, message, category, confidence))
				660
				661
				662	# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
				663	_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
				664	r'\\([abfnrtv?"\\\']\|\d+\|x[0-9a-fA-F]+)')
				665	# Matches strings. Escape codes should already be removed by ESCAPES.
				666	_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
				667	# Matches characters. Escape codes should already be removed by ESCAPES.
				668	_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
				669	# Matches multi-line C++ comments.
				670	# This RE is a little bit more complicated than one might expect, because we
				671	# have to take care of space removals tools so we can handle comments inside
				672	# statements better.
				673	# The current rule is: We only clear spaces from both sides when we're at the
				674	# end of the line. Otherwise, we try to remove spaces from the right side,
				675	# if this doesn't work we try on left side but only if there's a non-character
				676	# on the right.
				677	_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
				678	r"""(\s/\.\/\s*$\|
				679	/\.\*/\s+\|
				680	\s+/\.\*/(?=\W)\|
				681	/\.\*/)""", re.VERBOSE)
				682
				683
				684	def IsCppString(line):
				685	"""Does line terminate so, that the next symbol is in string constant.
				686
				687	This function does not consider single-line nor multi-line comments.
				688
				689	Args:
				690	line: is a partial line of code starting from the 0..n.
				691
				692	Returns:
				693	True, if next character appended to 'line' is inside a
				694	string constant.
				695	"""
				696
				697	line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
				698	return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
				699
				700
				701	def FindNextMultiLineCommentStart(lines, lineix):
				702	"""Find the beginning marker for a multiline comment."""
				703	while lineix < len(lines):
				704	if lines[lineix].strip().startswith('/*'):
				705	# Only return this marker if the comment goes beyond this line
				706	if lines[lineix].strip().find('*/', 2) < 0:
				707	return lineix
				708	lineix += 1
				709	return len(lines)
				710
				711
				712	def FindNextMultiLineCommentEnd(lines, lineix):
				713	"""We are inside a comment, find the end marker."""
				714	while lineix < len(lines):
				715	if lines[lineix].strip().endswith('*/'):
				716	return lineix
				717	lineix += 1
				718	return len(lines)
				719
				720
				721	def RemoveMultiLineCommentsFromRange(lines, begin, end):
				722	"""Clears a range of lines for multi-line comments."""
				723	# Having // dummy comments makes the lines non-empty, so we will not get
				724	# unnecessary blank line warnings later in the code.
				725	for i in range(begin, end):
				726	lines[i] = '// dummy'
				727
				728
				729	def RemoveMultiLineComments(filename, lines, error):
				730	"""Removes multiline (c-style) comments from lines."""
				731	lineix = 0
				732	while lineix < len(lines):
				733	lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
				734	if lineix_begin >= len(lines):
				735	return
				736	lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
				737	if lineix_end >= len(lines):
				738	error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
				739	'Could not find end of multi-line comment')
				740	return
				741	RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
				742	lineix = lineix_end + 1
				743
				744
				745	def CleanseComments(line):
				746	"""Removes //-comments and single-line C-style /* */ comments.
				747
				748	Args:
				749	line: A line of C++ source.
				750
				751	Returns:
				752	The line with single-line comments removed.
				753	"""
				754	commentpos = line.find('//')
				755	if commentpos != -1 and not IsCppString(line[:commentpos]):
				756	line = line[:commentpos]
				757	# get rid of /* ... */
				758	return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
				759
				760
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	761	class CleansedLines(object):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	762	"""Holds 3 copies of all lines with different preprocessing applied to them.
				763
				764	1) elided member contains lines without strings and comments,
				765	2) lines member contains lines without comments, and
				766	3) raw member contains all the lines without processing.
				767	All these three members are of <type 'list'>, and of the same length.
				768	"""
				769
				770	def __init__(self, lines):
				771	self.elided = []
				772	self.lines = []
				773	self.raw_lines = lines
				774	self.num_lines = len(lines)
				775	for linenum in range(len(lines)):
				776	self.lines.append(CleanseComments(lines[linenum]))
				777	elided = self._CollapseStrings(lines[linenum])
				778	self.elided.append(CleanseComments(elided))
				779
				780	def NumLines(self):
				781	"""Returns the number of lines represented."""
				782	return self.num_lines
				783
				784	@staticmethod
				785	def _CollapseStrings(elided):
				786	"""Collapses strings and chars on a line to simple "" or '' blocks.
				787
				788	We nix strings first so we're not fooled by text like '"http://"'
				789
				790	Args:
				791	elided: The line being processed.
				792
				793	Returns:
				794	The line with collapsed strings.
				795	"""
				796	if not _RE_PATTERN_INCLUDE.match(elided):
				797	# Remove escaped characters first to make quote/single quote collapsing
				798	# basic. Things that look like escaped characters shouldn't occur
				799	# outside of strings and chars.
				800	elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
				801	elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
				802	elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
				803	return elided
				804
				805
				806	def CloseExpression(clean_lines, linenum, pos):
				807	"""If input points to ( or { or [, finds the position that closes it.
				808
				809	If lines[linenum][pos] points to a '(' or '{' or '[', finds the the
				810	linenum/pos that correspond to the closing of the expression.
				811
				812	Args:
				813	clean_lines: A CleansedLines instance containing the file.
				814	linenum: The number of the line to check.
				815	pos: A position on the line.
				816
				817	Returns:
				818	A tuple (line, linenum, pos) pointer past the closing brace, or
				819	(line, len(lines), -1) if we never find a close. Note we ignore
				820	strings and comments when matching; and the line we return is the
				821	'cleansed' line at linenum.
				822	"""
				823
				824	line = clean_lines.elided[linenum]
				825	startchar = line[pos]
				826	if startchar not in '({[':
				827	return (line, clean_lines.NumLines(), -1)
				828	if startchar == '(': endchar = ')'
				829	if startchar == '[': endchar = ']'
				830	if startchar == '{': endchar = '}'
				831
				832	num_open = line.count(startchar) - line.count(endchar)
				833	while linenum < clean_lines.NumLines() and num_open > 0:
				834	linenum += 1
				835	line = clean_lines.elided[linenum]
				836	num_open += line.count(startchar) - line.count(endchar)
				837	# OK, now find the endchar that actually got us back to even
				838	endpos = len(line)
				839	while num_open >= 0:
				840	endpos = line.rfind(')', 0, endpos)
				841	num_open -= 1 # chopped off another )
				842	return (line, linenum, endpos + 1)
				843
				844
				845	def CheckForCopyright(filename, lines, error):
				846	"""Logs an error if no Copyright message appears at the top of the file."""
				847
				848	# We'll say it should occur by line 10. Don't forget there's a
				849	# dummy line at the front.
				850	for line in xrange(1, min(len(lines), 11)):
				851	if re.search(r'Copyright', lines[line], re.I): break
				852	else: # means no copyright line was found
				853	error(filename, 0, 'legal/copyright', 5,
				854	'No copyright message found. '
				855	'You should have a line: "Copyright [year] <Copyright Owner>"')
				856
				857
				858	def GetHeaderGuardCPPVariable(filename):
				859	"""Returns the CPP variable that should be used as a header guard.
				860
				861	Args:
				862	filename: The name of a C++ header file.
				863
				864	Returns:
				865	The CPP variable that should be used as a header guard in the
				866	named file.
				867
				868	"""
				869
				870	fileinfo = FileInfo(filename)
				871	return re.sub(r'[-./\s]', '_', fileinfo.RepositoryName()).upper() + '_'
				872
				873
				874	def CheckForHeaderGuard(filename, lines, error):
				875	"""Checks that the file contains a header guard.
				876
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	877	Logs an error if no #ifndef header guard is present. For other
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	878	headers, checks that the full pathname is used.
				879
				880	Args:
				881	filename: The name of the C++ header file.
				882	lines: An array of strings, each representing a line of the file.
				883	error: The function to call with any errors found.
				884	"""
				885
				886	cppvar = GetHeaderGuardCPPVariable(filename)
				887
				888	ifndef = None
				889	ifndef_linenum = 0
				890	define = None
				891	endif = None
				892	endif_linenum = 0
				893	for linenum, line in enumerate(lines):
				894	linesplit = line.split()
				895	if len(linesplit) >= 2:
				896	# find the first occurrence of #ifndef and #define, save arg
				897	if not ifndef and linesplit[0] == '#ifndef':
				898	# set ifndef to the header guard presented on the #ifndef line.
				899	ifndef = linesplit[1]
				900	ifndef_linenum = linenum
				901	if not define and linesplit[0] == '#define':
				902	define = linesplit[1]
				903	# find the last occurrence of #endif, save entire line
				904	if line.startswith('#endif'):
				905	endif = line
				906	endif_linenum = linenum
				907
				908	if not ifndef or not define or ifndef != define:
				909	error(filename, 0, 'build/header_guard', 5,
				910	'No #ifndef header guard found, suggested CPP variable is: %s' %
				911	cppvar)
				912	return
				913
				914	# The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
				915	# for backward compatibility.
				916	if ifndef != cppvar:
				917	error_level = 0
				918	if ifndef != cppvar + '_':
				919	error_level = 5
				920
				921	error(filename, ifndef_linenum, 'build/header_guard', error_level,
				922	'#ifndef header guard has wrong style, please use: %s' % cppvar)
				923
				924	if endif != ('#endif // %s' % cppvar):
				925	error_level = 0
				926	if endif != ('#endif // %s' % (cppvar + '_')):
				927	error_level = 5
				928
				929	error(filename, endif_linenum, 'build/header_guard', error_level,
				930	'#endif line should be "#endif // %s"' % cppvar)
				931
				932
				933	def CheckForUnicodeReplacementCharacters(filename, lines, error):
				934	"""Logs an error for each line containing Unicode replacement characters.
				935
				936	These indicate that either the file contained invalid UTF-8 (likely)
				937	or Unicode replacement characters (which it shouldn't). Note that
				938	it's possible for this to throw off line numbering if the invalid
				939	UTF-8 occurred adjacent to a newline.
				940
				941	Args:
				942	filename: The name of the current file.
				943	lines: An array of strings, each representing a line of the file.
				944	error: The function to call with any errors found.
				945	"""
				946	for linenum, line in enumerate(lines):
				947	if u'\ufffd' in line:
				948	error(filename, linenum, 'readability/utf8', 5,
				949	'Line contains invalid UTF-8 (or Unicode replacement character).')
				950
				951
				952	def CheckForNewlineAtEOF(filename, lines, error):
				953	"""Logs an error if there is no newline char at the end of the file.
				954
				955	Args:
				956	filename: The name of the current file.
				957	lines: An array of strings, each representing a line of the file.
				958	error: The function to call with any errors found.
				959	"""
				960
				961	# The array lines() was created by adding two newlines to the
				962	# original file (go figure), then splitting on \n.
				963	# To verify that the file ends in \n, we just have to make sure the
				964	# last-but-two element of lines() exists and is empty.
				965	if len(lines) < 3 or lines[-2]:
				966	error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
				967	'Could not find a newline character at the end of the file.')
				968
				969
				970	def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
				971	"""Logs an error if we see /* ... */ or "..." that extend past one line.
				972
				973	/* ... */ comments are legit inside macros, for one line.
				974	Otherwise, we prefer // comments, so it's ok to warn about the
				975	other. Likewise, it's ok for strings to extend across multiple
				976	lines, as long as a line continuation character (backslash)
				977	terminates each line. Although not currently prohibited by the C++
				978	style guide, it's ugly and unnecessary. We don't do well with either
				979	in this lint program, so we warn about both.
				980
				981	Args:
				982	filename: The name of the current file.
				983	clean_lines: A CleansedLines instance containing the file.
				984	linenum: The number of the line to check.
				985	error: The function to call with any errors found.
				986	"""
				987	line = clean_lines.elided[linenum]
				988
				989	# Remove all \\ (escaped backslashes) from the line. They are OK, and the
				990	# second (escaped) slash may trigger later \" detection erroneously.
				991	line = line.replace('\\\\', '')
				992
				993	if line.count('/') > line.count('/'):
				994	error(filename, linenum, 'readability/multiline_comment', 5,
				995	'Complex multi-line /.../-style comment found. '
				996	'Lint may give bogus warnings. '
				997	'Consider replacing these with //-style comments, '
				998	'with #if 0...#endif, '
				999	'or with more clearly structured multi-line comments.')
				1000
				1001	if (line.count('"') - line.count('\\"')) % 2:
				1002	error(filename, linenum, 'readability/multiline_string', 5,
				1003	'Multi-line string ("...") found. This lint script doesn\'t '
				1004	'do well with such strings, and may give bogus warnings. They\'re '
				1005	'ugly and unnecessary, and you should use concatenation instead".')
				1006
				1007
				1008	threading_list = (
				1009	('asctime(', 'asctime_r('),
				1010	('ctime(', 'ctime_r('),
				1011	('getgrgid(', 'getgrgid_r('),
				1012	('getgrnam(', 'getgrnam_r('),
				1013	('getlogin(', 'getlogin_r('),
				1014	('getpwnam(', 'getpwnam_r('),
				1015	('getpwuid(', 'getpwuid_r('),
				1016	('gmtime(', 'gmtime_r('),
				1017	('localtime(', 'localtime_r('),
				1018	('rand(', 'rand_r('),
				1019	('readdir(', 'readdir_r('),
				1020	('strtok(', 'strtok_r('),
				1021	('ttyname(', 'ttyname_r('),
				1022	)
				1023
				1024
				1025	def CheckPosixThreading(filename, clean_lines, linenum, error):
				1026	"""Checks for calls to thread-unsafe functions.
				1027
				1028	Much code has been originally written without consideration of
				1029	multi-threading. Also, engineers are relying on their old experience;
				1030	they have learned posix before threading extensions were added. These
				1031	tests guide the engineers to use thread-safe functions (when using
				1032	posix directly).
				1033
				1034	Args:
				1035	filename: The name of the current file.
				1036	clean_lines: A CleansedLines instance containing the file.
				1037	linenum: The number of the line to check.
				1038	error: The function to call with any errors found.
				1039	"""
				1040	line = clean_lines.elided[linenum]
				1041	for single_thread_function, multithread_safe_function in threading_list:
				1042	ix = line.find(single_thread_function)
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	1043	# Comparisons made explicit for clarity -- pylint: disable-msg=C6403
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1044	if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
				1045	line[ix - 1] not in ('_', '.', '>'))):
				1046	error(filename, linenum, 'runtime/threadsafe_fn', 2,
				1047	'Consider using ' + multithread_safe_function +
				1048	'...) instead of ' + single_thread_function +
				1049	'...) for improved thread safety.')
				1050
				1051
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	1052	# Matches invalid increment: *count++, which moves pointer insead of
				1053	# incrementing a value.
				1054	_RE_PATTERN_IVALID_INCREMENT = re.compile(
				1055	r'^\s\\w+(\+\+\|--);')
				1056
				1057
				1058	def CheckInvalidIncrement(filename, clean_lines, linenum, error):
				1059	"""Checks for invalud increment *count++.
				1060
				1061	For example following function:
				1062	void increment_counter(int* count) {
				1063	*count++;
				1064	}
				1065	is invalid, because it effectively does count++, moving pointer, and should
				1066	be replaced with ++count, (count)++ or *count += 1.
				1067
				1068	Args:
				1069	filename: The name of the current file.
				1070	clean_lines: A CleansedLines instance containing the file.
				1071	linenum: The number of the line to check.
				1072	error: The function to call with any errors found.
				1073	"""
				1074	line = clean_lines.elided[linenum]
				1075	if _RE_PATTERN_IVALID_INCREMENT.match(line):
				1076	error(filename, linenum, 'runtime/invalid_increment', 5,
				1077	'Changing pointer instead of value (or unused value of operator*).')
				1078
				1079
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1080	class _ClassInfo(object):
				1081	"""Stores information about a class."""
				1082
				1083	def __init__(self, name, linenum):
				1084	self.name = name
				1085	self.linenum = linenum
				1086	self.seen_open_brace = False
				1087	self.is_derived = False
				1088	self.virtual_method_linenumber = None
				1089	self.has_virtual_destructor = False
				1090	self.brace_depth = 0
				1091
				1092
				1093	class _ClassState(object):
				1094	"""Holds the current state of the parse relating to class declarations.
				1095
				1096	It maintains a stack of _ClassInfos representing the parser's guess
				1097	as to the current nesting of class declarations. The innermost class
				1098	is at the top (back) of the stack. Typically, the stack will either
				1099	be empty or have exactly one entry.
				1100	"""
				1101
				1102	def __init__(self):
				1103	self.classinfo_stack = []
				1104
				1105	def CheckFinished(self, filename, error):
				1106	"""Checks that all classes have been completely parsed.
				1107
				1108	Call this when all lines in a file have been processed.
				1109	Args:
				1110	filename: The name of the current file.
				1111	error: The function to call with any errors found.
				1112	"""
				1113	if self.classinfo_stack:
				1114	# Note: This test can result in false positives if #ifdef constructs
				1115	# get in the way of brace matching. See the testBuildClass test in
				1116	# cpplint_unittest.py for an example of this.
				1117	error(filename, self.classinfo_stack[0].linenum, 'build/class', 5,
				1118	'Failed to find complete declaration of class %s' %
				1119	self.classinfo_stack[0].name)
				1120
				1121
				1122	def CheckForNonStandardConstructs(filename, clean_lines, linenum,
				1123	class_state, error):
				1124	"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
				1125
				1126	Complain about several constructs which gcc-2 accepts, but which are
				1127	not standard C++. Warning about these in lint is one way to ease the
				1128	transition to new compilers.
				1129	- put storage class first (e.g. "static const" instead of "const static").
				1130	- "%lld" instead of %qd" in printf-type functions.
				1131	- "%1$d" is non-standard in printf-type functions.
				1132	- "\%" is an undefined character escape sequence.
				1133	- text after #endif is not allowed.
				1134	- invalid inner-style forward declaration.
				1135	- >? and <? operators, and their >?= and <?= cousins.
				1136	- classes with virtual methods need virtual destructors (compiler warning
				1137	available, but not turned on yet.)
				1138
				1139	Additionally, check for constructor/destructor style violations as it
				1140	is very convenient to do so while checking for gcc-2 compliance.
				1141
				1142	Args:
				1143	filename: The name of the current file.
				1144	clean_lines: A CleansedLines instance containing the file.
				1145	linenum: The number of the line to check.
				1146	class_state: A _ClassState instance which maintains information about
				1147	the current stack of nested class declarations being parsed.
				1148	error: A callable to which errors are reported, which takes 4 arguments:
				1149	filename, line number, error level, and message
				1150	"""
				1151
				1152	# Remove comments from the line, but leave in strings for now.
				1153	line = clean_lines.lines[linenum]
				1154
				1155	if Search(r'printf\s\(.".%[-+ ]?\dq', line):
				1156	error(filename, linenum, 'runtime/printf_format', 3,
				1157	'%q in format strings is deprecated. Use %ll instead.')
				1158
				1159	if Search(r'printf\s\(.".*%\d+\$', line):
				1160	error(filename, linenum, 'runtime/printf_format', 2,
				1161	'%N$ formats are unconventional. Try rewriting to avoid them.')
				1162
				1163	# Remove escaped backslashes before looking for undefined escapes.
				1164	line = line.replace('\\\\', '')
				1165
				1166	if Search(r'("\|\').*\\(%\|\[\|\(\|{)', line):
				1167	error(filename, linenum, 'build/printf_format', 3,
				1168	'%, [, (, and { are undefined character escapes. Unescape them.')
				1169
				1170	# For the rest, work with both comments and strings removed.
				1171	line = clean_lines.elided[linenum]
				1172
				1173	if Search(r'\b(const\|volatile\|void\|char\|short\|int\|long'
				1174	r'\|float\|double\|signed\|unsigned'
				1175	r'\|schar\|u?int8\|u?int16\|u?int32\|u?int64)'
				1176	r'\s+(auto\|register\|static\|extern\|typedef)\b',
				1177	line):
				1178	error(filename, linenum, 'build/storage_class', 5,
				1179	'Storage class (static, extern, typedef, etc) should be first.')
				1180
				1181	if Match(r'\s#\sendif\s*[^/\s]+', line):
				1182	error(filename, linenum, 'build/endif_comment', 5,
				1183	'Uncommented text after #endif is non-standard. Use a comment.')
				1184
				1185	if Match(r'\sclass\s+(\w+\s::\s)+\w+\s;', line):
				1186	error(filename, linenum, 'build/forward_decl', 5,
				1187	'Inner-style forward declarations are invalid. Remove this line.')
				1188
				1189	if Search(r'(\w+\|[+-]?\d+(\.\d)?)\s(<\|>)\?=?\s(\w+\|[+-]?\d+)(\.\d)?',
				1190	line):
				1191	error(filename, linenum, 'build/deprecated', 3,
				1192	'>? and <? (max and min) operators are non-standard and deprecated.')
				1193
				1194	# Track class entry and exit, and attempt to find cases within the
				1195	# class declaration that don't meet the C++ style
				1196	# guidelines. Tracking is very dependent on the code matching Google
				1197	# style guidelines, but it seems to perform well enough in testing
				1198	# to be a worthwhile addition to the checks.
				1199	classinfo_stack = class_state.classinfo_stack
				1200	# Look for a class declaration
				1201	class_decl_match = Match(
				1202	r'\s(template\s<[\w\s<>,:]>\s)?(class\|struct)\s+(\w+(::\w+)*)', line)
				1203	if class_decl_match:
				1204	classinfo_stack.append(_ClassInfo(class_decl_match.group(3), linenum))
				1205
				1206	# Everything else in this function uses the top of the stack if it's
				1207	# not empty.
				1208	if not classinfo_stack:
				1209	return
				1210
				1211	classinfo = classinfo_stack[-1]
				1212
				1213	# If the opening brace hasn't been seen look for it and also
				1214	# parent class declarations.
				1215	if not classinfo.seen_open_brace:
				1216	# If the line has a ';' in it, assume it's a forward declaration or
				1217	# a single-line class declaration, which we won't process.
				1218	if line.find(';') != -1:
				1219	classinfo_stack.pop()
				1220	return
				1221	classinfo.seen_open_brace = (line.find('{') != -1)
				1222	# Look for a bare ':'
				1223	if Search('(^\|[^:]):($\|[^:])', line):
				1224	classinfo.is_derived = True
				1225	if not classinfo.seen_open_brace:
				1226	return # Everything else in this function is for after open brace
				1227
				1228	# The class may have been declared with namespace or classname qualifiers.
				1229	# The constructor and destructor will not have those qualifiers.
				1230	base_classname = classinfo.name.split('::')[-1]
				1231
				1232	# Look for single-argument constructors that aren't marked explicit.
				1233	# Technically a valid construct, but against style.
				1234	args = Match(r'(?<!explicit)\s+%s\s*$([^,()]+)$'
				1235	% re.escape(base_classname),
				1236	line)
				1237	if (args and
				1238	args.group(1) != 'void' and
				1239	not Match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
				1240	args.group(1).strip())):
				1241	error(filename, linenum, 'runtime/explicit', 5,
				1242	'Single-argument constructors should be marked explicit.')
				1243
				1244	# Look for methods declared virtual.
				1245	if Search(r'\bvirtual\b', line):
				1246	classinfo.virtual_method_linenumber = linenum
				1247	# Only look for a destructor declaration on the same line. It would
				1248	# be extremely unlikely for the destructor declaration to occupy
				1249	# more than one line.
				1250	if Search(r'~%s\s*\(' % base_classname, line):
				1251	classinfo.has_virtual_destructor = True
				1252
				1253	# Look for class end.
				1254	brace_depth = classinfo.brace_depth
				1255	brace_depth = brace_depth + line.count('{') - line.count('}')
				1256	if brace_depth <= 0:
				1257	classinfo = classinfo_stack.pop()
				1258	# Try to detect missing virtual destructor declarations.
				1259	# For now, only warn if a non-derived class with virtual methods lacks
				1260	# a virtual destructor. This is to make it less likely that people will
				1261	# declare derived virtual destructors without declaring the base
				1262	# destructor virtual.
				1263	if ((classinfo.virtual_method_linenumber is not None) and
				1264	(not classinfo.has_virtual_destructor) and
				1265	(not classinfo.is_derived)): # Only warn for base classes
				1266	error(filename, classinfo.linenum, 'runtime/virtual', 4,
				1267	'The class %s probably needs a virtual destructor due to '
				1268	'having virtual method(s), one declared at line %d.'
				1269	% (classinfo.name, classinfo.virtual_method_linenumber))
				1270	else:
				1271	classinfo.brace_depth = brace_depth
				1272
				1273
				1274	def CheckSpacingForFunctionCall(filename, line, linenum, error):
				1275	"""Checks for the correctness of various spacing around function calls.
				1276
				1277	Args:
				1278	filename: The name of the current file.
				1279	line: The text of the line to check.
				1280	linenum: The number of the line to check.
				1281	error: The function to call with any errors found.
				1282	"""
				1283
				1284	# Since function calls often occur inside if/for/while/switch
				1285	# expressions - which have their own, more liberal conventions - we
				1286	# first see if we should be looking inside such an expression for a
				1287	# function call, to which we can apply more strict standards.
				1288	fncall = line # if there's no control flow construct, look at whole line
				1289	for pattern in (r'\bif\s$(.)$\s*{',
				1290	r'\bfor\s$(.)$\s*{',
				1291	r'\bwhile\s$(.)$\s*[{;]',
				1292	r'\bswitch\s$(.)$\s*{'):
				1293	match = Search(pattern, line)
				1294	if match:
				1295	fncall = match.group(1) # look inside the parens for function calls
				1296	break
				1297
				1298	# Except in if/for/while/switch, there should never be space
				1299	# immediately inside parens (eg "f( 3, 4 )"). We make an exception
				1300	# for nested parens ( (a+b) + c ). Likewise, there should never be
				1301	# a space before a ( when it's a function argument. I assume it's a
				1302	# function argument when the char before the whitespace is legal in
				1303	# a function name (alnum + _) and we're not starting a macro. Also ignore
				1304	# pointers and references to arrays and functions coz they're too tricky:
				1305	# we use a very simple way to recognize these:
				1306	# " (something)(maybe-something)" or
				1307	# " (something)(maybe-something," or
				1308	# " (something)[something]"
				1309	# Note that we assume the contents of [] to be short enough that
				1310	# they'll never need to wrap.
				1311	if ( # Ignore control structures.
				1312	not Search(r'\b(if\|for\|while\|switch\|return\|delete)\b', fncall) and
				1313	# Ignore pointers/references to functions.
				1314	not Search(r' $[^)]+$$[^)]*($\|,$)', fncall) and
				1315	# Ignore pointers/references to arrays.
				1316	not Search(r' $[^)]+$\[[^\]]+\]', fncall)):
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	1317	if Search(r'\w\s\(\s(?!\s\\$)', fncall): # a ( used for a fn call
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1318	error(filename, linenum, 'whitespace/parens', 4,
				1319	'Extra space after ( in function call')
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	1320	elif Search(r'$\s+(?!(\s*\$\|\()', fncall):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1321	error(filename, linenum, 'whitespace/parens', 2,
				1322	'Extra space after (')
				1323	if (Search(r'\w\s+\(', fncall) and
				1324	not Search(r'#\s*define\|typedef', fncall)):
				1325	error(filename, linenum, 'whitespace/parens', 4,
				1326	'Extra space before ( in function call')
				1327	# If the ) is followed only by a newline or a { + newline, assume it's
				1328	# part of a control statement (if/while/etc), and don't complain
				1329	if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
				1330	error(filename, linenum, 'whitespace/parens', 2,
				1331	'Extra space before )')
				1332
				1333
				1334	def IsBlankLine(line):
				1335	"""Returns true if the given line is blank.
				1336
				1337	We consider a line to be blank if the line is empty or consists of
				1338	only white spaces.
				1339
				1340	Args:
				1341	line: A line of a string.
				1342
				1343	Returns:
				1344	True, if the given line is blank.
				1345	"""
				1346	return not line or line.isspace()
				1347
				1348
				1349	def CheckForFunctionLengths(filename, clean_lines, linenum,
				1350	function_state, error):
				1351	"""Reports for long function bodies.
				1352
				1353	For an overview why this is done, see:
				1354	http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
				1355
				1356	Uses a simplistic algorithm assuming other style guidelines
				1357	(especially spacing) are followed.
				1358	Only checks unindented functions, so class members are unchecked.
				1359	Trivial bodies are unchecked, so constructors with huge initializer lists
				1360	may be missed.
				1361	Blank/comment lines are not counted so as to avoid encouraging the removal
				1362	of vertical space and commments just to get through a lint check.
				1363	NOLINT on the last line of a function disables this check.
				1364
				1365	Args:
				1366	filename: The name of the current file.
				1367	clean_lines: A CleansedLines instance containing the file.
				1368	linenum: The number of the line to check.
				1369	function_state: Current function name and lines in body so far.
				1370	error: The function to call with any errors found.
				1371	"""
				1372	lines = clean_lines.lines
				1373	line = lines[linenum]
				1374	raw = clean_lines.raw_lines
				1375	raw_line = raw[linenum]
				1376	joined_line = ''
				1377
				1378	starting_func = False
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	1379	regexp = r'(\w(\w\|::\|\\|\&\|\s))\(' # decls * & space::name( ...
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1380	match_result = Match(regexp, line)
				1381	if match_result:
				1382	# If the name is all caps and underscores, figure it's a macro and
				1383	# ignore it, unless it's TEST or TEST_F.
				1384	function_name = match_result.group(1).split()[-1]
				1385	if function_name == 'TEST' or function_name == 'TEST_F' or (
				1386	not Match(r'[A-Z_]+$', function_name)):
				1387	starting_func = True
				1388
				1389	if starting_func:
				1390	body_found = False
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	1391	for start_linenum in xrange(linenum, clean_lines.NumLines()):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1392	start_line = lines[start_linenum]
				1393	joined_line += ' ' + start_line.lstrip()
				1394	if Search(r'(;\|})', start_line): # Declarations and trivial functions
				1395	body_found = True
				1396	break # ... ignore
				1397	elif Search(r'{', start_line):
				1398	body_found = True
				1399	function = Search(r'((\w\|:)*)\(', line).group(1)
				1400	if Match(r'TEST', function): # Handle TEST... macros
				1401	parameter_regexp = Search(r'($.*$)', joined_line)
				1402	if parameter_regexp: # Ignore bad syntax
				1403	function += parameter_regexp.group(1)
				1404	else:
				1405	function += '()'
				1406	function_state.Begin(function)
				1407	break
				1408	if not body_found:
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	1409	# No body for the function (or evidence of a non-function) was found.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1410	error(filename, linenum, 'readability/fn_size', 5,
				1411	'Lint failed to find start of function body.')
				1412	elif Match(r'^\}\s*$', line): # function end
				1413	if not Search(r'\bNOLINT\b', raw_line):
				1414	function_state.Check(error, filename, linenum)
				1415	function_state.End()
				1416	elif not Match(r'^\s*$', line):
				1417	function_state.Count() # Count non-blank/non-comment lines.
				1418
				1419
				1420	_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO($.+?$)?:?(\s\|$)?')
				1421
				1422
				1423	def CheckComment(comment, filename, linenum, error):
				1424	"""Checks for common mistakes in TODO comments.
				1425
				1426	Args:
				1427	comment: The text of the comment from the line in question.
				1428	filename: The name of the current file.
				1429	linenum: The number of the line to check.
				1430	error: The function to call with any errors found.
				1431	"""
				1432	match = _RE_PATTERN_TODO.match(comment)
				1433	if match:
				1434	# One whitespace is correct; zero whitespace is handled elsewhere.
				1435	leading_whitespace = match.group(1)
				1436	if len(leading_whitespace) > 1:
				1437	error(filename, linenum, 'whitespace/todo', 2,
				1438	'Too many spaces before TODO')
				1439
				1440	username = match.group(2)
				1441	if not username:
				1442	error(filename, linenum, 'readability/todo', 2,
				1443	'Missing username in TODO; it should look like '
				1444	'"// TODO(my_username): Stuff."')
				1445
				1446	middle_whitespace = match.group(3)
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	1447	# Comparisons made explicit for correctness -- pylint: disable-msg=C6403
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1448	if middle_whitespace != ' ' and middle_whitespace != '':
				1449	error(filename, linenum, 'whitespace/todo', 2,
				1450	'TODO(my_username) should be followed by a space')
				1451
				1452
				1453	def CheckSpacing(filename, clean_lines, linenum, error):
				1454	"""Checks for the correctness of various spacing issues in the code.
				1455
				1456	Things we check for: spaces around operators, spaces after
				1457	if/for/while/switch, no spaces around parens in function calls, two
				1458	spaces between code and comment, don't start a block with a blank
				1459	line, don't end a function with a blank line, don't have too many
				1460	blank lines in a row.
				1461
				1462	Args:
				1463	filename: The name of the current file.
				1464	clean_lines: A CleansedLines instance containing the file.
				1465	linenum: The number of the line to check.
				1466	error: The function to call with any errors found.
				1467	"""
				1468
				1469	raw = clean_lines.raw_lines
				1470	line = raw[linenum]
				1471
				1472	# Before nixing comments, check if the line is blank for no good
				1473	# reason. This includes the first line after a block is opened, and
				1474	# blank lines at the end of a function (ie, right before a line like '}'
				1475	if IsBlankLine(line):
				1476	elided = clean_lines.elided
				1477	prev_line = elided[linenum - 1]
				1478	prevbrace = prev_line.rfind('{')
				1479	# TODO(unknown): Don't complain if line before blank line, and line after,
				1480	# both start with alnums and are indented the same amount.
				1481	# This ignores whitespace at the start of a namespace block
				1482	# because those are not usually indented.
				1483	if (prevbrace != -1 and prev_line[prevbrace:].find('}') == -1
				1484	and prev_line[:prevbrace].find('namespace') == -1):
				1485	# OK, we have a blank line at the start of a code block. Before we
				1486	# complain, we check if it is an exception to the rule: The previous
				1487	# non-empty line has the paramters of a function header that are indented
				1488	# 4 spaces (because they did not fit in a 80 column line when placed on
				1489	# the same line as the function name). We also check for the case where
				1490	# the previous line is indented 6 spaces, which may happen when the
				1491	# initializers of a constructor do not fit into a 80 column line.
				1492	exception = False
				1493	if Match(r' {6}\w', prev_line): # Initializer list?
				1494	# We are looking for the opening column of initializer list, which
				1495	# should be indented 4 spaces to cause 6 space indentation afterwards.
				1496	search_position = linenum-2
				1497	while (search_position >= 0
				1498	and Match(r' {6}\w', elided[search_position])):
				1499	search_position -= 1
				1500	exception = (search_position >= 0
				1501	and elided[search_position][:5] == ' :')
				1502	else:
				1503	# Search for the function arguments or an initializer list. We use a
				1504	# simple heuristic here: If the line is indented 4 spaces; and we have a
				1505	# closing paren, without the opening paren, followed by an opening brace
				1506	# or colon (for initializer lists) we assume that it is the last line of
				1507	# a function header. If we have a colon indented 4 spaces, it is an
				1508	# initializer list.
				1509	exception = (Match(r' {4}\w[^$]$\s(const\s)?(\{\s$\|:)',
				1510	prev_line)
				1511	or Match(r' {4}:', prev_line))
				1512
				1513	if not exception:
				1514	error(filename, linenum, 'whitespace/blank_line', 2,
				1515	'Blank line at the start of a code block. Is this needed?')
				1516	# This doesn't ignore whitespace at the end of a namespace block
				1517	# because that is too hard without pairing open/close braces;
				1518	# however, a special exception is made for namespace closing
				1519	# brackets which have a comment containing "namespace".
				1520	#
				1521	# Also, ignore blank lines at the end of a block in a long if-else
				1522	# chain, like this:
				1523	# if (condition1) {
				1524	# // Something followed by a blank line
				1525	#
				1526	# } else if (condition2) {
				1527	# // Something else
				1528	# }
				1529	if linenum + 1 < clean_lines.NumLines():
				1530	next_line = raw[linenum + 1]
				1531	if (next_line
				1532	and Match(r'\s*}', next_line)
				1533	and next_line.find('namespace') == -1
				1534	and next_line.find('} else ') == -1):
				1535	error(filename, linenum, 'whitespace/blank_line', 3,
				1536	'Blank line at the end of a code block. Is this needed?')
				1537
				1538	# Next, we complain if there's a comment too near the text
				1539	commentpos = line.find('//')
				1540	if commentpos != -1:
				1541	# Check if the // may be in quotes. If so, ignore it
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	1542	# Comparisons made explicit for clarity -- pylint: disable-msg=C6403
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1543	if (line.count('"', 0, commentpos) -
				1544	line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
				1545	# Allow one space for new scopes, two spaces otherwise:
				1546	if (not Match(r'^\s*{ //', line) and
				1547	((commentpos >= 1 and
				1548	line[commentpos-1] not in string.whitespace) or
				1549	(commentpos >= 2 and
				1550	line[commentpos-2] not in string.whitespace))):
				1551	error(filename, linenum, 'whitespace/comments', 2,
				1552	'At least two spaces is best between code and comments')
				1553	# There should always be a space between the // and the comment
				1554	commentend = commentpos + 2
				1555	if commentend < len(line) and not line[commentend] == ' ':
				1556	# but some lines are exceptions -- e.g. if they're big
				1557	# comment delimiters like:
				1558	# //----------------------------------------------------------
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	1559	# or they begin with multiple slashes followed by a space:
				1560	# //////// Header comment
				1561	match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
				1562	Search(r'^/+ ', line[commentend:]))
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1563	if not match:
				1564	error(filename, linenum, 'whitespace/comments', 4,
				1565	'Should have a space between // and comment')
				1566	CheckComment(line[commentpos:], filename, linenum, error)
				1567
				1568	line = clean_lines.elided[linenum] # get rid of comments and strings
				1569
				1570	# Don't try to do spacing checks for operator methods
				1571	line = re.sub(r'operator(==\|!=\|<\|<<\|<=\|>=\|>>\|>)\(', 'operator\(', line)
				1572
				1573	# We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
				1574	# Otherwise not. Note we only check for non-spaces on both sides;
				1575	# sometimes people put non-spaces on one side when aligning ='s among
				1576	# many lines (not that this is behavior that I approve of...)
				1577	if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if\|while) ', line):
				1578	error(filename, linenum, 'whitespace/operators', 4,
				1579	'Missing spaces around =')
				1580
				1581	# It's ok not to have spaces around binary operators like + - * /, but if
				1582	# there's too little whitespace, we get concerned. It's hard to tell,
				1583	# though, so we punt on this one for now. TODO.
				1584
				1585	# You should always have whitespace around binary operators.
				1586	# Alas, we can't test < or > because they're legitimately used sans spaces
				1587	# (a->b, vector<int> a). The only time we can tell is a < with no >, and
				1588	# only if it's not template params list spilling into the next line.
				1589	match = Search(r'[^<>=!\s](==\|!=\|<=\|>=)[^<>=!\s]', line)
				1590	if not match:
				1591	# Note that while it seems that the '<[^<]*' term in the following
				1592	# regexp could be simplified to '<.*', which would indeed match
				1593	# the same class of strings, the [^<] means that searching for the
				1594	# regexp takes linear rather than quadratic time.
				1595	if not Search(r'<[^<],\s$', line): # template params spill
				1596	match = Search(r'[^<>=!\s](<)[^<>=!\s]([^>]\|->)*$', line)
				1597	if match:
				1598	error(filename, linenum, 'whitespace/operators', 3,
				1599	'Missing spaces around %s' % match.group(1))
				1600	# We allow no-spaces around << and >> when used like this: 10<<20, but
				1601	# not otherwise (particularly, not when used as streams)
				1602	match = Search(r'[^0-9\s](<<\|>>)[^0-9\s]', line)
				1603	if match:
				1604	error(filename, linenum, 'whitespace/operators', 3,
				1605	'Missing spaces around %s' % match.group(1))
				1606
				1607	# There shouldn't be space around unary operators
				1608	match = Search(r'(!\s\|~\s\|[\s]--[\s;]\|[\s]\+\+[\s;])', line)
				1609	if match:
				1610	error(filename, linenum, 'whitespace/operators', 4,
				1611	'Extra space for operator %s' % match.group(1))
				1612
				1613	# A pet peeve of mine: no spaces after an if, while, switch, or for
				1614	match = Search(r' (if\(\|for\(\|while\(\|switch\()', line)
				1615	if match:
				1616	error(filename, linenum, 'whitespace/parens', 5,
				1617	'Missing space before ( in %s' % match.group(1))
				1618
				1619	# For if/for/while/switch, the left and right parens should be
				1620	# consistent about how many spaces are inside the parens, and
				1621	# there should either be zero or one spaces inside the parens.
				1622	# We don't want: "if ( foo)" or "if ( foo )".
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	1623	# Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1624	match = Search(r'\b(if\|for\|while\|switch)\s*'
				1625	r'$([ ])(.).[^ ]+([ ])$\s{\s*$',
				1626	line)
				1627	if match:
				1628	if len(match.group(2)) != len(match.group(4)):
				1629	if not (match.group(3) == ';' and
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	1630	len(match.group(2)) == 1 + len(match.group(4)) or
				1631	not match.group(2) and Search(r'\bfor\s$.; $', line)):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1632	error(filename, linenum, 'whitespace/parens', 5,
				1633	'Mismatching spaces inside () in %s' % match.group(1))
				1634	if not len(match.group(2)) in [0, 1]:
				1635	error(filename, linenum, 'whitespace/parens', 5,
				1636	'Should have zero or one spaces inside ( and ) in %s' %
				1637	match.group(1))
				1638
				1639	# You should always have a space after a comma (either as fn arg or operator)
				1640	if Search(r',[^\s]', line):
				1641	error(filename, linenum, 'whitespace/comma', 3,
				1642	'Missing space after ,')
				1643
				1644	# Next we will look for issues with function calls.
				1645	CheckSpacingForFunctionCall(filename, line, linenum, error)
				1646
				1647	# Except after an opening paren, you should have spaces before your braces.
				1648	# And since you should never have braces at the beginning of a line, this is
				1649	# an easy test.
				1650	if Search(r'[^ (]{', line):
				1651	error(filename, linenum, 'whitespace/braces', 5,
				1652	'Missing space before {')
				1653
				1654	# Make sure '} else {' has spaces.
				1655	if Search(r'}else', line):
				1656	error(filename, linenum, 'whitespace/braces', 5,
				1657	'Missing space before else')
				1658
				1659	# You shouldn't have spaces before your brackets, except maybe after
				1660	# 'delete []' or 'new char * []'.
				1661	if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
				1662	error(filename, linenum, 'whitespace/braces', 5,
				1663	'Extra space before [')
				1664
				1665	# You shouldn't have a space before a semicolon at the end of the line.
				1666	# There's a special case for "for" since the style guide allows space before
				1667	# the semicolon there.
				1668	if Search(r':\s;\s$', line):
				1669	error(filename, linenum, 'whitespace/semicolon', 5,
				1670	'Semicolon defining empty statement. Use { } instead.')
				1671	elif Search(r'^\s;\s$', line):
				1672	error(filename, linenum, 'whitespace/semicolon', 5,
				1673	'Line contains only semicolon. If this should be an empty statement, '
				1674	'use { } instead.')
				1675	elif (Search(r'\s+;\s*$', line) and
				1676	not Search(r'\bfor\b', line)):
				1677	error(filename, linenum, 'whitespace/semicolon', 5,
				1678	'Extra space before last semicolon. If this should be an empty '
				1679	'statement, use { } instead.')
				1680
				1681
				1682	def GetPreviousNonBlankLine(clean_lines, linenum):
				1683	"""Return the most recent non-blank line and its line number.
				1684
				1685	Args:
				1686	clean_lines: A CleansedLines instance containing the file contents.
				1687	linenum: The number of the line to check.
				1688
				1689	Returns:
				1690	A tuple with two elements. The first element is the contents of the last
				1691	non-blank line before the current line, or the empty string if this is the
				1692	first non-blank line. The second is the line number of that line, or -1
				1693	if this is the first non-blank line.
				1694	"""
				1695
				1696	prevlinenum = linenum - 1
				1697	while prevlinenum >= 0:
				1698	prevline = clean_lines.elided[prevlinenum]
				1699	if not IsBlankLine(prevline): # if not a blank line...
				1700	return (prevline, prevlinenum)
				1701	prevlinenum -= 1
				1702	return ('', -1)
				1703
				1704
				1705	def CheckBraces(filename, clean_lines, linenum, error):
				1706	"""Looks for misplaced braces (e.g. at the end of line).
				1707
				1708	Args:
				1709	filename: The name of the current file.
				1710	clean_lines: A CleansedLines instance containing the file.
				1711	linenum: The number of the line to check.
				1712	error: The function to call with any errors found.
				1713	"""
				1714
				1715	line = clean_lines.elided[linenum] # get rid of comments and strings
				1716
				1717	if Match(r'\s{\s$', line):
				1718	# We allow an open brace to start a line in the case where someone
				1719	# is using braces in a block to explicitly create a new scope,
				1720	# which is commonly used to control the lifetime of
				1721	# stack-allocated variables. We don't detect this perfectly: we
				1722	# just don't complain if the last non-whitespace character on the
				1723	# previous non-blank line is ';', ':', '{', or '}'.
				1724	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
				1725	if not Search(r'[;:}{]\s*$', prevline):
				1726	error(filename, linenum, 'whitespace/braces', 4,
				1727	'{ should almost always be at the end of the previous line')
				1728
				1729	# An else clause should be on the same line as the preceding closing brace.
				1730	if Match(r'\selse\s', line):
				1731	prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
				1732	if Match(r'\s}\s$', prevline):
				1733	error(filename, linenum, 'whitespace/newline', 4,
				1734	'An else should appear on the same line as the preceding }')
				1735
				1736	# If braces come on one side of an else, they should be on both.
				1737	# However, we have to worry about "else if" that spans multiple lines!
				1738	if Search(r'}\selse[^{]$', line) or Match(r'[^}]else\s{', line):
				1739	if Search(r'}\selse if([^{])$', line): # could be multi-line if
				1740	# find the ( after the if
				1741	pos = line.find('else if')
				1742	pos = line.find('(', pos)
				1743	if pos > 0:
				1744	(endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
				1745	if endline[endpos:].find('{') == -1: # must be brace after if
				1746	error(filename, linenum, 'readability/braces', 5,
				1747	'If an else has a brace on one side, it should have it on both')
				1748	else: # common case: else not followed by a multi-line if
				1749	error(filename, linenum, 'readability/braces', 5,
				1750	'If an else has a brace on one side, it should have it on both')
				1751
				1752	# Likewise, an else should never have the else clause on the same line
				1753	if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
				1754	error(filename, linenum, 'whitespace/newline', 4,
				1755	'Else clause should never be on same line as else (use 2 lines)')
				1756
				1757	# In the same way, a do/while should never be on one line
				1758	if Match(r'\s*do [^\s{]', line):
				1759	error(filename, linenum, 'whitespace/newline', 4,
				1760	'do/while clauses should not be on a single line')
				1761
				1762	# Braces shouldn't be followed by a ; unless they're defining a struct
				1763	# or initializing an array.
				1764	# We can't tell in general, but we can for some common cases.
				1765	prevlinenum = linenum
				1766	while True:
				1767	(prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
				1768	if Match(r'\s+{.}\s;', line) and not prevline.count(';'):
				1769	line = prevline + line
				1770	else:
				1771	break
				1772	if (Search(r'{.}\s;', line) and
				1773	line.count('{') == line.count('}') and
				1774	not Search(r'struct\|class\|enum\|\s=\s{', line)):
				1775	error(filename, linenum, 'readability/braces', 4,
				1776	"You don't need a ; after a }")
				1777
				1778
				1779	def ReplaceableCheck(operator, macro, line):
				1780	"""Determine whether a basic CHECK can be replaced with a more specific one.
				1781
				1782	For example suggest using CHECK_EQ instead of CHECK(a == b) and
				1783	similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
				1784
				1785	Args:
				1786	operator: The C++ operator used in the CHECK.
				1787	macro: The CHECK or EXPECT macro being called.
				1788	line: The current source line.
				1789
				1790	Returns:
				1791	True if the CHECK can be replaced with a more specific one.
				1792	"""
				1793
				1794	# This matches decimal and hex integers, strings, and chars (in that order).
				1795	match_constant = r'([-+]?(\d+\|0[xX][0-9a-fA-F]+)[lLuU]{0,3}\|"."\|\'.\')'
				1796
				1797	# Expression to match two sides of the operator with something that
				1798	# looks like a literal, since CHECK(x == iterator) won't compile.
				1799	# This means we can't catch all the cases where a more specific
				1800	# CHECK is possible, but it's less annoying than dealing with
				1801	# extraneous warnings.
				1802	match_this = (r'\s' + macro + r'\((\s' +
				1803	match_constant + r'\s' + operator + r'[^<>].\|'
				1804	r'.[^<>]' + operator + r'\s' + match_constant +
				1805	r'\s*\))')
				1806
				1807	# Don't complain about CHECK(x == NULL) or similar because
				1808	# CHECK_EQ(x, NULL) won't compile (requires a cast).
				1809	# Also, don't complain about more complex boolean expressions
				1810	# involving && or \|\| such as CHECK(a == b \|\| c == d).
				1811	return Match(match_this, line) and not Search(r'NULL\|&&\|\\|\\|', line)
				1812
				1813
				1814	def CheckCheck(filename, clean_lines, linenum, error):
				1815	"""Checks the use of CHECK and EXPECT macros.
				1816
				1817	Args:
				1818	filename: The name of the current file.
				1819	clean_lines: A CleansedLines instance containing the file.
				1820	linenum: The number of the line to check.
				1821	error: The function to call with any errors found.
				1822	"""
				1823
				1824	# Decide the set of replacement macros that should be suggested
				1825	raw_lines = clean_lines.raw_lines
				1826	current_macro = ''
				1827	for macro in _CHECK_MACROS:
				1828	if raw_lines[linenum].find(macro) >= 0:
				1829	current_macro = macro
				1830	break
				1831	if not current_macro:
				1832	# Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
				1833	return
				1834
				1835	line = clean_lines.elided[linenum] # get rid of comments and strings
				1836
				1837	# Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
				1838	for operator in ['==', '!=', '>=', '>', '<=', '<']:
				1839	if ReplaceableCheck(operator, current_macro, line):
				1840	error(filename, linenum, 'readability/check', 2,
				1841	'Consider using %s instead of %s(a %s b)' % (
				1842	_CHECK_REPLACEMENT[current_macro][operator],
				1843	current_macro, operator))
				1844	break
				1845
				1846
				1847	def GetLineWidth(line):
				1848	"""Determines the width of the line in column positions.
				1849
				1850	Args:
				1851	line: A string, which may be a Unicode string.
				1852
				1853	Returns:
				1854	The width of the line in column positions, accounting for Unicode
				1855	combining characters and wide characters.
				1856	"""
				1857	if isinstance(line, unicode):
				1858	width = 0
				1859	for c in unicodedata.normalize('NFC', line):
				1860	if unicodedata.east_asian_width(c) in ('W', 'F'):
				1861	width += 2
				1862	elif not unicodedata.combining(c):
				1863	width += 1
				1864	return width
				1865	else:
				1866	return len(line)
				1867
				1868
				1869	def CheckStyle(filename, clean_lines, linenum, file_extension, error):
				1870	"""Checks rules from the 'C++ style rules' section of cppguide.html.
				1871
				1872	Most of these rules are hard to test (naming, comment style), but we
				1873	do what we can. In particular we check for 2-space indents, line lengths,
				1874	tab usage, spaces inside code, etc.
				1875
				1876	Args:
				1877	filename: The name of the current file.
				1878	clean_lines: A CleansedLines instance containing the file.
				1879	linenum: The number of the line to check.
				1880	file_extension: The extension (without the dot) of the filename.
				1881	error: The function to call with any errors found.
				1882	"""
				1883
				1884	raw_lines = clean_lines.raw_lines
				1885	line = raw_lines[linenum]
				1886
				1887	if line.find('\t') != -1:
				1888	error(filename, linenum, 'whitespace/tab', 1,
				1889	'Tab found; better to use spaces')
				1890
				1891	# One or three blank spaces at the beginning of the line is weird; it's
				1892	# hard to reconcile that with 2-space indents.
				1893	# NOTE: here are the conditions rob pike used for his tests. Mine aren't
				1894	# as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
				1895	# if(RLENGTH > 20) complain = 0;
				1896	# if(match($0, " +(error\|private\|public\|protected):")) complain = 0;
				1897	# if(match(prev, "&& *$")) complain = 0;
				1898	# if(match(prev, "\\\|\\\| *$")) complain = 0;
				1899	# if(match(prev, "[\",=><] *$")) complain = 0;
				1900	# if(match($0, " <<")) complain = 0;
				1901	# if(match(prev, " +for \\(")) complain = 0;
				1902	# if(prevodd && match(prevprev, " +for \\(")) complain = 0;
				1903	initial_spaces = 0
				1904	cleansed_line = clean_lines.elided[linenum]
				1905	while initial_spaces < len(line) and line[initial_spaces] == ' ':
				1906	initial_spaces += 1
				1907	if line and line[-1].isspace():
				1908	error(filename, linenum, 'whitespace/end_of_line', 4,
				1909	'Line ends in whitespace. Consider deleting these extra spaces.')
				1910	# There are certain situations we allow one space, notably for labels
				1911	elif ((initial_spaces == 1 or initial_spaces == 3) and
				1912	not Match(r'\s\w+\s:\s*$', cleansed_line)):
				1913	error(filename, linenum, 'whitespace/indent', 3,
				1914	'Weird number of spaces at line-start. '
				1915	'Are you using a 2-space indent?')
				1916	# Labels should always be indented at least one space.
				1917	elif not initial_spaces and line[:2] != '//' and Search(r'[^:]:\s*$',
				1918	line):
				1919	error(filename, linenum, 'whitespace/labels', 4,
				1920	'Labels should always be indented at least one space. '
				1921	'If this is a member-initializer list in a constructor, '
				1922	'the colon should be on the line after the definition header.')
				1923
				1924	# Check if the line is a header guard.
				1925	is_header_guard = False
				1926	if file_extension == 'h':
				1927	cppvar = GetHeaderGuardCPPVariable(filename)
				1928	if (line.startswith('#ifndef %s' % cppvar) or
				1929	line.startswith('#define %s' % cppvar) or
				1930	line.startswith('#endif // %s' % cppvar)):
				1931	is_header_guard = True
				1932	# #include lines and header guards can be long, since there's no clean way to
				1933	# split them.
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	1934	#
				1935	# URLs can be long too. It's possible to split these, but it makes them
				1936	# harder to cut&paste.
				1937	if (not line.startswith('#include') and not is_header_guard and
				1938	not Match(r'^\s//.http(s?)://\S*$', line)):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	1939	line_width = GetLineWidth(line)
				1940	if line_width > 100:
				1941	error(filename, linenum, 'whitespace/line_length', 4,
				1942	'Lines should very rarely be longer than 100 characters')
				1943	elif line_width > 80:
				1944	error(filename, linenum, 'whitespace/line_length', 2,
				1945	'Lines should be <= 80 characters long')
				1946
				1947	if (cleansed_line.count(';') > 1 and
				1948	# for loops are allowed two ;'s (and may run over two lines).
				1949	cleansed_line.find('for') == -1 and
				1950	(GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
				1951	GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
				1952	# It's ok to have many commands in a switch case that fits in 1 line
				1953	not ((cleansed_line.find('case ') != -1 or
				1954	cleansed_line.find('default:') != -1) and
				1955	cleansed_line.find('break;') != -1)):
				1956	error(filename, linenum, 'whitespace/newline', 4,
				1957	'More than one command on the same line')
				1958
				1959	# Some more style checks
				1960	CheckBraces(filename, clean_lines, linenum, error)
				1961	CheckSpacing(filename, clean_lines, linenum, error)
				1962	CheckCheck(filename, clean_lines, linenum, error)
				1963
				1964
				1965	_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
				1966	_RE_PATTERN_INCLUDE = re.compile(r'^\s#\sinclude\s([<"])([^>"])[>"].*$')
				1967	# Matches the first component of a filename delimited by -s and _s. That is:
				1968	# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
				1969	# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
				1970	# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
				1971	# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
				1972	_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
				1973
				1974
				1975	def _DropCommonSuffixes(filename):
				1976	"""Drops common suffixes like _test.cc or -inl.h from filename.
				1977
				1978	For example:
				1979	>>> _DropCommonSuffixes('foo/foo-inl.h')
				1980	'foo/foo'
				1981	>>> _DropCommonSuffixes('foo/bar/foo.cc')
				1982	'foo/bar/foo'
				1983	>>> _DropCommonSuffixes('foo/foo_internal.h')
				1984	'foo/foo'
				1985	>>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
				1986	'foo/foo_unusualinternal'
				1987
				1988	Args:
				1989	filename: The input filename.
				1990
				1991	Returns:
				1992	The filename with the common suffix removed.
				1993	"""
				1994	for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
				1995	'inl.h', 'impl.h', 'internal.h'):
				1996	if (filename.endswith(suffix) and len(filename) > len(suffix) and
				1997	filename[-len(suffix) - 1] in ('-', '_')):
				1998	return filename[:-len(suffix) - 1]
				1999	return os.path.splitext(filename)[0]
				2000
				2001
				2002	def _IsTestFilename(filename):
				2003	"""Determines if the given filename has a suffix that identifies it as a test.
				2004
				2005	Args:
				2006	filename: The input filename.
				2007
				2008	Returns:
				2009	True if 'filename' looks like a test, False otherwise.
				2010	"""
				2011	if (filename.endswith('_test.cc') or
				2012	filename.endswith('_unittest.cc') or
				2013	filename.endswith('_regtest.cc')):
				2014	return True
				2015	else:
				2016	return False
				2017
				2018
				2019	def _ClassifyInclude(fileinfo, include, is_system):
				2020	"""Figures out what kind of header 'include' is.
				2021
				2022	Args:
				2023	fileinfo: The current file cpplint is running over. A FileInfo instance.
				2024	include: The path to a #included file.
				2025	is_system: True if the #include used <> rather than "".
				2026
				2027	Returns:
				2028	One of the _XXX_HEADER constants.
				2029
				2030	For example:
				2031	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
				2032	_C_SYS_HEADER
				2033	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
				2034	_CPP_SYS_HEADER
				2035	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
				2036	_LIKELY_MY_HEADER
				2037	>>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
				2038	... 'bar/foo_other_ext.h', False)
				2039	_POSSIBLE_MY_HEADER
				2040	>>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
				2041	_OTHER_HEADER
				2042	"""
				2043	# This is a list of all standard c++ header files, except
				2044	# those already checked for above.
				2045	is_stl_h = include in _STL_HEADERS
				2046	is_cpp_h = is_stl_h or include in _CPP_HEADERS
				2047
				2048	if is_system:
				2049	if is_cpp_h:
				2050	return _CPP_SYS_HEADER
				2051	else:
				2052	return _C_SYS_HEADER
				2053
				2054	# If the target file and the include we're checking share a
				2055	# basename when we drop common extensions, and the include
				2056	# lives in . , then it's likely to be owned by the target file.
				2057	target_dir, target_base = (
				2058	os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
				2059	include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
				2060	if target_base == include_base and (
				2061	include_dir == target_dir or
				2062	include_dir == os.path.normpath(target_dir + '/../public')):
				2063	return _LIKELY_MY_HEADER
				2064
				2065	# If the target and include share some initial basename
				2066	# component, it's possible the target is implementing the
				2067	# include, so it's allowed to be first, but we'll never
				2068	# complain if it's not there.
				2069	target_first_component = _RE_FIRST_COMPONENT.match(target_base)
				2070	include_first_component = _RE_FIRST_COMPONENT.match(include_base)
				2071	if (target_first_component and include_first_component and
				2072	target_first_component.group(0) ==
				2073	include_first_component.group(0)):
				2074	return _POSSIBLE_MY_HEADER
				2075
				2076	return _OTHER_HEADER
				2077
				2078
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2079
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	2080	def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
				2081	"""Check rules that are applicable to #include lines.
				2082
				2083	Strings on #include lines are NOT removed from elided line, to make
				2084	certain tasks easier. However, to prevent false positives, checks
				2085	applicable to #include lines in CheckLanguage must be put here.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2086
				2087	Args:
				2088	filename: The name of the current file.
				2089	clean_lines: A CleansedLines instance containing the file.
				2090	linenum: The number of the line to check.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2091	include_state: An _IncludeState instance in which the headers are inserted.
				2092	error: The function to call with any errors found.
				2093	"""
				2094	fileinfo = FileInfo(filename)
				2095
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	2096	line = clean_lines.lines[linenum]
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2097
				2098	# "include" should use the new style "foo/bar.h" instead of just "bar.h"
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	2099	if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2100	error(filename, linenum, 'build/include', 4,
				2101	'Include the directory when naming .h files')
				2102
				2103	# we shouldn't include a file more than once. actually, there are a
				2104	# handful of instances where doing so is okay, but in general it's
				2105	# not.
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	2106	match = _RE_PATTERN_INCLUDE.search(line)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2107	if match:
				2108	include = match.group(2)
				2109	is_system = (match.group(1) == '<')
				2110	if include in include_state:
				2111	error(filename, linenum, 'build/include', 4,
				2112	'"%s" already included at %s:%s' %
				2113	(include, filename, include_state[include]))
				2114	else:
				2115	include_state[include] = linenum
				2116
				2117	# We want to ensure that headers appear in the right order:
				2118	# 1) for foo.cc, foo.h (preferred location)
				2119	# 2) c system files
				2120	# 3) cpp system files
				2121	# 4) for foo.cc, foo.h (deprecated location)
				2122	# 5) other google headers
				2123	#
				2124	# We classify each include statement as one of those 5 types
				2125	# using a number of techniques. The include_state object keeps
				2126	# track of the highest type seen, and complains if we see a
				2127	# lower type after that.
				2128	error_message = include_state.CheckNextIncludeOrder(
				2129	_ClassifyInclude(fileinfo, include, is_system))
				2130	if error_message:
				2131	error(filename, linenum, 'build/include_order', 4,
				2132	'%s. Should be: %s.h, c system, c++ system, other.' %
				2133	(error_message, fileinfo.BaseName()))
				2134
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	2135	# Look for any of the stream classes that are part of standard C++.
				2136	match = _RE_PATTERN_INCLUDE.match(line)
				2137	if match:
				2138	include = match.group(2)
				2139	if Match(r'(f\|ind\|io\|i\|o\|parse\|pf\|stdio\|str\|)?stream$', include):
				2140	# Many unit tests use cout, so we exempt them.
				2141	if not _IsTestFilename(filename):
				2142	error(filename, linenum, 'readability/streams', 3,
				2143	'Streams are highly discouraged.')
				2144
				2145	def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
				2146	error):
				2147	"""Checks rules from the 'C++ language rules' section of cppguide.html.
				2148
				2149	Some of these rules are hard to test (function overloading, using
				2150	uint32 inappropriately), but we do the best we can.
				2151
				2152	Args:
				2153	filename: The name of the current file.
				2154	clean_lines: A CleansedLines instance containing the file.
				2155	linenum: The number of the line to check.
				2156	file_extension: The extension (without the dot) of the filename.
				2157	include_state: An _IncludeState instance in which the headers are inserted.
				2158	error: The function to call with any errors found.
				2159	"""
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2160	# If the line is empty or consists of entirely a comment, no need to
				2161	# check it.
				2162	line = clean_lines.elided[linenum]
				2163	if not line:
				2164	return
				2165
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	2166	match = _RE_PATTERN_INCLUDE.search(line)
				2167	if match:
				2168	CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
				2169	return
				2170
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2171	# Create an extended_line, which is the concatenation of the current and
				2172	# next lines, for more effective checking of code that may span more than one
				2173	# line.
				2174	if linenum + 1 < clean_lines.NumLines():
				2175	extended_line = line + clean_lines.elided[linenum + 1]
				2176	else:
				2177	extended_line = line
				2178
				2179	# Make Windows paths like Unix.
				2180	fullname = os.path.abspath(filename).replace('\\', '/')
				2181
				2182	# TODO(unknown): figure out if they're using default arguments in fn proto.
				2183
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2184	# Check for non-const references in functions. This is tricky because &
				2185	# is also used to take the address of something. We allow <> for templates,
				2186	# (ignoring whatever is between the braces) and : for classes.
				2187	# These are complicated re's. They try to capture the following:
				2188	# paren (for fn-prototype start), typename, &, varname. For the const
				2189	# version, we're willing for const to be before typename or after
				2190	# Don't check the implemention on same line.
				2191	fnline = line.split('{', 1)[0]
				2192	if (len(re.findall(r'\([^()]\b(?:[\w:]\|<[^()]>)+(\s?&\|&\s?)\w+', fnline)) >
				2193	len(re.findall(r'\([^()]*\bconst\s+(?:typename\s+)?(?:struct\s+)?'
				2194	r'(?:[\w:]\|<[^()]*>)+(\s?&\|&\s?)\w+', fnline)) +
				2195	len(re.findall(r'\([^()]\b(?:[\w:]\|<[^()]>)+\s+const(\s?&\|&\s?)[\w]+',
				2196	fnline))):
				2197
				2198	# We allow non-const references in a few standard places, like functions
				2199	# called "swap()" or iostream operators like "<<" or ">>".
				2200	if not Search(
				2201	r'(swap\|Swap\|operator[<>][<>])\s\(\s(?:[\w:]\|<.>)+\s&',
				2202	fnline):
				2203	error(filename, linenum, 'runtime/references', 2,
				2204	'Is this a non-const reference? '
				2205	'If so, make const or use a pointer.')
				2206
				2207	# Check to see if they're using an conversion function cast.
				2208	# I just try to capture the most common basic types, though there are more.
				2209	# Parameterless conversion functions, such as bool(), are allowed as they are
				2210	# probably a member operator declaration or default constructor.
				2211	match = Search(
				2212	r'\b(int\|float\|double\|bool\|char\|int32\|uint32\|int64\|uint64)\([^)]', line)
				2213	if match:
				2214	# gMock methods are defined using some variant of MOCK_METHODx(name, type)
				2215	# where type may be float(), int(string), etc. Without context they are
				2216	# virtually indistinguishable from int(x) casts.
				2217	if not Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line):
				2218	error(filename, linenum, 'readability/casting', 4,
				2219	'Using deprecated casting style. '
				2220	'Use static_cast<%s>(...) instead' %
				2221	match.group(1))
				2222
				2223	CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				2224	'static_cast',
				2225	r'$(int\|float\|double\|bool\|char\|u?int(16\|32\|64))$',
				2226	error)
				2227	# This doesn't catch all cases. Consider (const char * const)"hello".
				2228	CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
				2229	'reinterpret_cast', r'$(\w+\s?\*+\s?)$', error)
				2230
				2231	# In addition, we look for people taking the address of a cast. This
				2232	# is dangerous -- casts can assign to temporaries, so the pointer doesn't
				2233	# point where you think.
				2234	if Search(
				2235	r'(&$[^)]+$[\w(])\|(&(static\|dynamic\|reinterpret)_cast\b)', line):
				2236	error(filename, linenum, 'runtime/casting', 4,
				2237	('Are you taking an address of a cast? '
				2238	'This is dangerous: could be a temp var. '
				2239	'Take the address before doing the cast, rather than after'))
				2240
				2241	# Check for people declaring static/global STL strings at the top level.
				2242	# This is dangerous because the C++ language does not guarantee that
				2243	# globals with constructors are initialized before the first access.
				2244	match = Match(
				2245	r'((?:\|static +)(?:\|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
				2246	line)
				2247	# Make sure it's not a function.
				2248	# Function template specialization looks like: "string foo<Type>(...".
				2249	# Class template definitions look like: "string Foo<Type>::Method(...".
				2250	if match and not Match(r'\s(<.>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]\|$)',
				2251	match.group(3)):
				2252	error(filename, linenum, 'runtime/string', 4,
				2253	'For a static/global string constant, use a C style string instead: '
				2254	'"%schar %s[]".' %
				2255	(match.group(1), match.group(2)))
				2256
				2257	# Check that we're not using RTTI outside of testing code.
				2258	if Search(r'\bdynamic_cast<', line) and not _IsTestFilename(filename):
				2259	error(filename, linenum, 'runtime/rtti', 5,
				2260	'Do not use dynamic_cast<>. If you need to cast within a class '
				2261	"hierarchy, use static_cast<> to upcast. Google doesn't support "
				2262	'RTTI.')
				2263
				2264	if Search(r'\b([A-Za-z0-9_]*_)$\1$', line):
				2265	error(filename, linenum, 'runtime/init', 4,
				2266	'You seem to be initializing a member variable with itself.')
				2267
				2268	if file_extension == 'h':
				2269	# TODO(unknown): check that 1-arg constructors are explicit.
				2270	# How to tell it's a constructor?
				2271	# (handled in CheckForNonStandardConstructs for now)
				2272	# TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
				2273	# (level 1 error)
				2274	pass
				2275
				2276	# Check if people are using the verboten C basic types. The only exception
				2277	# we regularly allow is "unsigned short port" for port.
				2278	if Search(r'\bshort port\b', line):
				2279	if not Search(r'\bunsigned short port\b', line):
				2280	error(filename, linenum, 'runtime/int', 4,
				2281	'Use "unsigned short" for ports, not "short"')
				2282	else:
				2283	match = Search(r'\b(short\|long(?! +double)\|long long)\b', line)
				2284	if match:
				2285	error(filename, linenum, 'runtime/int', 4,
				2286	'Use int16/int64/etc, rather than the C type %s' % match.group(1))
				2287
				2288	# When snprintf is used, the second argument shouldn't be a literal.
				2289	match = Search(r'snprintf\s\(([^,]),\s([0-9])\s*,', line)
				2290	if match:
				2291	error(filename, linenum, 'runtime/printf', 3,
				2292	'If you can, use sizeof(%s) instead of %s as the 2nd arg '
				2293	'to snprintf.' % (match.group(1), match.group(2)))
				2294
				2295	# Check if some verboten C functions are being used.
				2296	if Search(r'\bsprintf\b', line):
				2297	error(filename, linenum, 'runtime/printf', 5,
				2298	'Never use sprintf. Use snprintf instead.')
				2299	match = Search(r'\b(strcpy\|strcat)\b', line)
				2300	if match:
				2301	error(filename, linenum, 'runtime/printf', 4,
				2302	'Almost always, snprintf is better than %s' % match.group(1))
				2303
				2304	if Search(r'\bsscanf\b', line):
				2305	error(filename, linenum, 'runtime/printf', 1,
				2306	'sscanf can be ok, but is slow and can overflow buffers.')
				2307
				2308	# Check for suspicious usage of "if" like
				2309	# } if (a == b) {
				2310	if Search(r'\}\sif\s\(', line):
				2311	error(filename, linenum, 'readability/braces', 4,
				2312	'Did you mean "else if"? If not, start a new line for "if".')
				2313
				2314	# Check for potential format string bugs like printf(foo).
				2315	# We constrain the pattern not to pick things like DocidForPrintf(foo).
				2316	# Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
				2317	match = re.search(r'\b((?:string)?printf)\s*$([\w.\->()]+)$', line, re.I)
				2318	if match:
				2319	error(filename, linenum, 'runtime/printf', 4,
				2320	'Potential format string bug. Do %s("%%s", %s) instead.'
				2321	% (match.group(1), match.group(2)))
				2322
				2323	# Check for potential memset bugs like memset(buf, sizeof(buf), 0).
				2324	match = Search(r'memset\s$([^,]),\s([^,]),\s0\s$', line)
				2325	if match and not Match(r"^''\|-?[0-9]+\|0x[0-9A-Fa-f]$", match.group(2)):
				2326	error(filename, linenum, 'runtime/memset', 4,
				2327	'Did you mean "memset(%s, 0, %s)"?'
				2328	% (match.group(1), match.group(2)))
				2329
				2330	if Search(r'\busing namespace\b', line):
				2331	error(filename, linenum, 'build/namespaces', 5,
				2332	'Do not use namespace using-directives. '
				2333	'Use using-declarations instead.')
				2334
				2335	# Detect variable-length arrays.
				2336	match = Match(r'\s(.+::)?(\w+) [a-z]\w\[(.+)];', line)
				2337	if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
				2338	match.group(3).find(']') == -1):
				2339	# Split the size using space and arithmetic operators as delimiters.
				2340	# If any of the resulting tokens are not compile time constants then
				2341	# report the error.
				2342	tokens = re.split(r'\s\|\+\|\-\|\*\|\/\|<<\|>>]', match.group(3))
				2343	is_const = True
				2344	skip_next = False
				2345	for tok in tokens:
				2346	if skip_next:
				2347	skip_next = False
				2348	continue
				2349
				2350	if Search(r'sizeof$.+$', tok): continue
				2351	if Search(r'arraysize$\w+$', tok): continue
				2352
				2353	tok = tok.lstrip('(')
				2354	tok = tok.rstrip(')')
				2355	if not tok: continue
				2356	if Match(r'\d+', tok): continue
				2357	if Match(r'0[xX][0-9a-fA-F]+', tok): continue
				2358	if Match(r'k[A-Z0-9]\w*', tok): continue
				2359	if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
				2360	if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
				2361	# A catch all for tricky sizeof cases, including 'sizeof expression',
				2362	# 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
				2363	# requires skipping the next token becasue we split on ' ' and '*'.
				2364	if tok.startswith('sizeof'):
				2365	skip_next = True
				2366	continue
				2367	is_const = False
				2368	break
				2369	if not is_const:
				2370	error(filename, linenum, 'runtime/arrays', 1,
				2371	'Do not use variable-length arrays. Use an appropriately named '
				2372	"('k' followed by CamelCase) compile-time constant for the size.")
				2373
				2374	# If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
				2375	# DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
				2376	# in the class declaration.
				2377	match = Match(
				2378	(r'\s*'
				2379	r'(DISALLOW_(EVIL_CONSTRUCTORS\|COPY_AND_ASSIGN\|IMPLICIT_CONSTRUCTORS))'
				2380	r'$.*$;$'),
				2381	line)
				2382	if match and linenum + 1 < clean_lines.NumLines():
				2383	next_line = clean_lines.elided[linenum + 1]
				2384	if not Search(r'^\s*};', next_line):
				2385	error(filename, linenum, 'readability/constructors', 3,
				2386	match.group(1) + ' should be the last thing in the class')
				2387
				2388	# Check for use of unnamed namespaces in header files. Registration
				2389	# macros are typically OK, so we allow use of "namespace {" on lines
				2390	# that end with backslashes.
				2391	if (file_extension == 'h'
				2392	and Search(r'\bnamespace\s*{', line)
				2393	and line[-1] != '\\'):
				2394	error(filename, linenum, 'build/namespaces', 4,
				2395	'Do not use unnamed namespaces in header files. See '
				2396	'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
				2397	' for more information.')
				2398
				2399
				2400	def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
				2401	error):
				2402	"""Checks for a C-style cast by looking for the pattern.
				2403
				2404	This also handles sizeof(type) warnings, due to similarity of content.
				2405
				2406	Args:
				2407	filename: The name of the current file.
				2408	linenum: The number of the line to check.
				2409	line: The line of code to check.
				2410	raw_line: The raw line of code to check, with comments.
				2411	cast_type: The string for the C++ cast to recommend. This is either
				2412	reinterpret_cast or static_cast, depending.
				2413	pattern: The regular expression used to find C-style casts.
				2414	error: The function to call with any errors found.
				2415	"""
				2416	match = Search(pattern, line)
				2417	if not match:
				2418	return
				2419
				2420	# e.g., sizeof(int)
				2421	sizeof_match = Match(r'.sizeof\s$', line[0:match.start(1) - 1])
				2422	if sizeof_match:
				2423	error(filename, linenum, 'runtime/sizeof', 1,
				2424	'Using sizeof(type). Use sizeof(varname) instead if possible')
				2425	return
				2426
				2427	remainder = line[match.end(0):]
				2428
				2429	# The close paren is for function pointers as arguments to a function.
				2430	# eg, void foo(void (*bar)(int));
				2431	# The semicolon check is a more basic function check; also possibly a
				2432	# function pointer typedef.
				2433	# eg, void foo(int); or void foo(int) const;
				2434	# The equals check is for function pointer assignment.
				2435	# eg, void (foo)(int) = ...
				2436	#
				2437	# Right now, this will only catch cases where there's a single argument, and
				2438	# it's unnamed. It should probably be expanded to check for multiple
				2439	# arguments with some unnamed.
				2440	function_match = Match(r'\s(\)\|=\|(const)?\s(;\|\{\|throw))', remainder)
				2441	if function_match:
				2442	if (not function_match.group(3) or
				2443	function_match.group(3) == ';' or
				2444	raw_line.find('/*') < 0):
				2445	error(filename, linenum, 'readability/function', 3,
				2446	'All parameters should be named in a function')
				2447	return
				2448
				2449	# At this point, all that should be left is actual casts.
				2450	error(filename, linenum, 'readability/casting', 4,
				2451	'Using C-style cast. Use %s<%s>(...) instead' %
				2452	(cast_type, match.group(1)))
				2453
				2454
				2455	_HEADERS_CONTAINING_TEMPLATES = (
				2456	('<deque>', ('deque',)),
				2457	('<functional>', ('unary_function', 'binary_function',
				2458	'plus', 'minus', 'multiplies', 'divides', 'modulus',
				2459	'negate',
				2460	'equal_to', 'not_equal_to', 'greater', 'less',
				2461	'greater_equal', 'less_equal',
				2462	'logical_and', 'logical_or', 'logical_not',
				2463	'unary_negate', 'not1', 'binary_negate', 'not2',
				2464	'bind1st', 'bind2nd',
				2465	'pointer_to_unary_function',
				2466	'pointer_to_binary_function',
				2467	'ptr_fun',
				2468	'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
				2469	'mem_fun_ref_t',
				2470	'const_mem_fun_t', 'const_mem_fun1_t',
				2471	'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
				2472	'mem_fun_ref',
				2473	)),
				2474	('<limits>', ('numeric_limits',)),
				2475	('<list>', ('list',)),
				2476	('<map>', ('map', 'multimap',)),
				2477	('<memory>', ('allocator',)),
				2478	('<queue>', ('queue', 'priority_queue',)),
				2479	('<set>', ('set', 'multiset',)),
				2480	('<stack>', ('stack',)),
				2481	('<string>', ('char_traits', 'basic_string',)),
				2482	('<utility>', ('pair',)),
				2483	('<vector>', ('vector',)),
				2484
				2485	# gcc extensions.
				2486	# Note: std::hash is their hash, ::hash is our hash
				2487	('<hash_map>', ('hash_map', 'hash_multimap',)),
				2488	('<hash_set>', ('hash_set', 'hash_multiset',)),
				2489	('<slist>', ('slist',)),
				2490	)
				2491
				2492	_HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
				2493	# We can trust with reasonable confidence that map gives us pair<>, too.
				2494	'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
				2495	}
				2496
				2497	_RE_PATTERN_STRING = re.compile(r'\bstring\b')
				2498
				2499	_re_pattern_algorithm_header = []
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	2500	for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
				2501	'transform'):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2502	# Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
				2503	# type::max().
				2504	_re_pattern_algorithm_header.append(
				2505	(re.compile(r'[^>.]\b' + _template + r'(<.*?>)?$[^$]'),
				2506	_template,
				2507	'<algorithm>'))
				2508
				2509	_re_pattern_templates = []
				2510	for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
				2511	for _template in _templates:
				2512	_re_pattern_templates.append(
				2513	(re.compile(r'(\<\|\b)' + _template + r'\s*\<'),
				2514	_template + '<>',
				2515	_header))
				2516
				2517
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	2518	def FilesBelongToSameModule(filename_cc, filename_h):
				2519	"""Check if these two filenames belong to the same module.
				2520
				2521	The concept of a 'module' here is a as follows:
				2522	foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
				2523	same 'module' if they are in the same directory.
				2524	some/path/public/xyzzy and some/path/internal/xyzzy are also considered
				2525	to belong to the same module here.
				2526
				2527	If the filename_cc contains a longer path than the filename_h, for example,
				2528	'/absolute/path/to/base/sysinfo.cc', and this file would include
				2529	'base/sysinfo.h', this function also produces the prefix needed to open the
				2530	header. This is used by the caller of this function to more robustly open the
				2531	header file. We don't have access to the real include paths in this context,
				2532	so we need this guesswork here.
				2533
				2534	Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
				2535	according to this implementation. Because of this, this function gives
				2536	some false positives. This should be sufficiently rare in practice.
				2537
				2538	Args:
				2539	filename_cc: is the path for the .cc file
				2540	filename_h: is the path for the header path
				2541
				2542	Returns:
				2543	Tuple with a bool and a string:
				2544	bool: True if filename_cc and filename_h belong to the same module.
				2545	string: the additional prefix needed to open the header file.
				2546	"""
				2547
				2548	if not filename_cc.endswith('.cc'):
				2549	return (False, '')
				2550	filename_cc = filename_cc[:-len('.cc')]
				2551	if filename_cc.endswith('_unittest'):
				2552	filename_cc = filename_cc[:-len('_unittest')]
				2553	elif filename_cc.endswith('_test'):
				2554	filename_cc = filename_cc[:-len('_test')]
				2555	filename_cc = filename_cc.replace('/public/', '/')
				2556	filename_cc = filename_cc.replace('/internal/', '/')
				2557
				2558	if not filename_h.endswith('.h'):
				2559	return (False, '')
				2560	filename_h = filename_h[:-len('.h')]
				2561	if filename_h.endswith('-inl'):
				2562	filename_h = filename_h[:-len('-inl')]
				2563	filename_h = filename_h.replace('/public/', '/')
				2564	filename_h = filename_h.replace('/internal/', '/')
				2565
				2566	files_belong_to_same_module = filename_cc.endswith(filename_h)
				2567	common_path = ''
				2568	if files_belong_to_same_module:
				2569	common_path = filename_cc[:-len(filename_h)]
				2570	return files_belong_to_same_module, common_path
				2571
				2572
				2573	def UpdateIncludeState(filename, include_state, io=codecs):
				2574	"""Fill up the include_state with new includes found from the file.
				2575
				2576	Args:
				2577	filename: the name of the header to read.
				2578	include_state: an _IncludeState instance in which the headers are inserted.
				2579	io: The io factory to use to read the file. Provided for testability.
				2580
				2581	Returns:
				2582	True if a header was succesfully added. False otherwise.
				2583	"""
				2584	headerfile = None
				2585	try:
				2586	headerfile = io.open(filename, 'r', 'utf8', 'replace')
				2587	except IOError:
				2588	return False
				2589	linenum = 0
				2590	for line in headerfile:
				2591	linenum += 1
				2592	clean_line = CleanseComments(line)
				2593	match = _RE_PATTERN_INCLUDE.search(clean_line)
				2594	if match:
				2595	include = match.group(2)
				2596	# The value formatting is cute, but not really used right now.
				2597	# What matters here is that the key is in include_state.
				2598	include_state.setdefault(include, '%s:%d' % (filename, linenum))
				2599	return True
				2600
				2601
				2602	def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
				2603	io=codecs):
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2604	"""Reports for missing stl includes.
				2605
				2606	This function will output warnings to make sure you are including the headers
				2607	necessary for the stl containers and functions that you use. We only give one
				2608	reason to include a header. For example, if you use both equal_to<> and
				2609	less<> in a .h file, only one (the latter in the file) of these will be
				2610	reported as a reason to include the <functional>.
				2611
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2612	Args:
				2613	filename: The name of the current file.
				2614	clean_lines: A CleansedLines instance containing the file.
				2615	include_state: An _IncludeState instance.
				2616	error: The function to call with any errors found.
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	2617	io: The IO factory to use to read the header file. Provided for unittest
				2618	injection.
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2619	"""
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2620	required = {} # A map of header name to linenumber and the template entity.
				2621	# Example of required: { '<functional>': (1219, 'less<>') }
				2622
				2623	for linenum in xrange(clean_lines.NumLines()):
				2624	line = clean_lines.elided[linenum]
				2625	if not line or line[0] == '#':
				2626	continue
				2627
				2628	# String is special -- it is a non-templatized type in STL.
				2629	if _RE_PATTERN_STRING.search(line):
				2630	required['<string>'] = (linenum, 'string')
				2631
				2632	for pattern, template, header in _re_pattern_algorithm_header:
				2633	if pattern.search(line):
				2634	required[header] = (linenum, template)
				2635
				2636	# The following function is just a speed up, no semantics are changed.
				2637	if not '<' in line: # Reduces the cpu time usage by skipping lines.
				2638	continue
				2639
				2640	for pattern, template, header in _re_pattern_templates:
				2641	if pattern.search(line):
				2642	required[header] = (linenum, template)
				2643
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	2644	# The policy is that if you #include something in foo.h you don't need to
				2645	# include it again in foo.cc. Here, we will look at possible includes.
				2646	# Let's copy the include_state so it is only messed up within this function.
				2647	include_state = include_state.copy()
				2648
				2649	# Did we find the header for this file (if any) and succesfully load it?
				2650	header_found = False
				2651
				2652	# Use the absolute path so that matching works properly.
				2653	abs_filename = os.path.abspath(filename)
				2654
				2655	# For Emacs's flymake.
				2656	# If cpplint is invoked from Emacs's flymake, a temporary file is generated
				2657	# by flymake and that file name might end with '_flymake.cc'. In that case,
				2658	# restore original file name here so that the corresponding header file can be
				2659	# found.
				2660	# e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
				2661	# instead of 'foo_flymake.h'
				2662	emacs_flymake_suffix = '_flymake.cc'
				2663	if abs_filename.endswith(emacs_flymake_suffix):
				2664	abs_filename = abs_filename[:-len(emacs_flymake_suffix)] + '.cc'
				2665
				2666	# include_state is modified during iteration, so we iterate over a copy of
				2667	# the keys.
				2668	for header in include_state.keys(): #NOLINT
				2669	(same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
				2670	fullpath = common_path + header
				2671	if same_module and UpdateIncludeState(fullpath, include_state, io):
				2672	header_found = True
				2673
				2674	# If we can't find the header file for a .cc, assume it's because we don't
				2675	# know where to look. In that case we'll give up as we're not sure they
				2676	# didn't include it in the .h file.
				2677	# TODO(unknown): Do a better job of finding .h files so we are confident that
				2678	# not having the .h file means there isn't one.
				2679	if filename.endswith('.cc') and not header_found:
				2680	return
				2681
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2682	# All the lines have been processed, report the errors found.
				2683	for required_header_unstripped in required:
				2684	template = required[required_header_unstripped][1]
				2685	if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
				2686	headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
				2687	if [True for header in headers if header in include_state]:
				2688	continue
				2689	if required_header_unstripped.strip('<>"') not in include_state:
				2690	error(filename, required[required_header_unstripped][0],
				2691	'build/include_what_you_use', 4,
				2692	'Add #include ' + required_header_unstripped + ' for ' + template)
				2693
				2694
				2695	def ProcessLine(filename, file_extension,
				2696	clean_lines, line, include_state, function_state,
				2697	class_state, error):
				2698	"""Processes a single line in the file.
				2699
				2700	Args:
				2701	filename: Filename of the file that is being processed.
				2702	file_extension: The extension (dot not included) of the file.
				2703	clean_lines: An array of strings, each representing a line of the file,
				2704	with comments stripped.
				2705	line: Number of line being processed.
				2706	include_state: An _IncludeState instance in which the headers are inserted.
				2707	function_state: A _FunctionState instance which counts function lines, etc.
				2708	class_state: A _ClassState instance which maintains information about
				2709	the current stack of nested class declarations being parsed.
				2710	error: A callable to which errors are reported, which takes 4 arguments:
				2711	filename, line number, error level, and message
				2712
				2713	"""
				2714	raw_lines = clean_lines.raw_lines
				2715	CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
				2716	if Search(r'\bNOLINT\b', raw_lines[line]): # ignore nolint lines
				2717	return
				2718	CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
				2719	CheckStyle(filename, clean_lines, line, file_extension, error)
				2720	CheckLanguage(filename, clean_lines, line, file_extension, include_state,
				2721	error)
				2722	CheckForNonStandardConstructs(filename, clean_lines, line,
				2723	class_state, error)
				2724	CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	2725	CheckInvalidIncrement(filename, clean_lines, line, error)
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2726
				2727
				2728	def ProcessFileData(filename, file_extension, lines, error):
				2729	"""Performs lint checks and reports any errors to the given error function.
				2730
				2731	Args:
				2732	filename: Filename of the file that is being processed.
				2733	file_extension: The extension (dot not included) of the file.
				2734	lines: An array of strings, each representing a line of the file, with the
				2735	last element being empty if the file is termined with a newline.
				2736	error: A callable to which errors are reported, which takes 4 arguments:
				2737	"""
				2738	lines = (['// marker so line numbers and indices both start at 1'] + lines +
				2739	['// marker so line numbers end in a known way'])
				2740
				2741	include_state = _IncludeState()
				2742	function_state = _FunctionState()
				2743	class_state = _ClassState()
				2744
				2745	CheckForCopyright(filename, lines, error)
				2746
				2747	if file_extension == 'h':
				2748	CheckForHeaderGuard(filename, lines, error)
				2749
				2750	RemoveMultiLineComments(filename, lines, error)
				2751	clean_lines = CleansedLines(lines)
				2752	for line in xrange(clean_lines.NumLines()):
				2753	ProcessLine(filename, file_extension, clean_lines, line,
				2754	include_state, function_state, class_state, error)
				2755	class_state.CheckFinished(filename, error)
				2756
				2757	CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
				2758
				2759	# We check here rather than inside ProcessLine so that we see raw
				2760	# lines rather than "cleaned" lines.
				2761	CheckForUnicodeReplacementCharacters(filename, lines, error)
				2762
				2763	CheckForNewlineAtEOF(filename, lines, error)
				2764
				2765
				2766	def ProcessFile(filename, vlevel):
				2767	"""Does google-lint on a single file.
				2768
				2769	Args:
				2770	filename: The name of the file to parse.
				2771
				2772	vlevel: The level of errors to report. Every error of confidence
				2773	>= verbose_level will be reported. 0 is a good default.
				2774	"""
				2775
				2776	_SetVerboseLevel(vlevel)
				2777
				2778	try:
				2779	# Support the UNIX convention of using "-" for stdin. Note that
				2780	# we are not opening the file with universal newline support
				2781	# (which codecs doesn't support anyway), so the resulting lines do
				2782	# contain trailing '\r' characters if we are reading a file that
				2783	# has CRLF endings.
				2784	# If after the split a trailing '\r' is present, it is removed
				2785	# below. If it is not expected to be present (i.e. os.linesep !=
				2786	# '\r\n' as in Windows), a warning is issued below if this file
				2787	# is processed.
				2788
				2789	if filename == '-':
				2790	lines = codecs.StreamReaderWriter(sys.stdin,
				2791	codecs.getreader('utf8'),
				2792	codecs.getwriter('utf8'),
				2793	'replace').read().split('\n')
				2794	else:
				2795	lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
				2796
				2797	carriage_return_found = False
				2798	# Remove trailing '\r'.
				2799	for linenum in range(len(lines)):
				2800	if lines[linenum].endswith('\r'):
				2801	lines[linenum] = lines[linenum].rstrip('\r')
				2802	carriage_return_found = True
				2803
				2804	except IOError:
				2805	sys.stderr.write(
				2806	"Skipping input '%s': Can't open for reading\n" % filename)
				2807	return
				2808
				2809	# Note, if no dot is found, this will give the entire filename as the ext.
				2810	file_extension = filename[filename.rfind('.') + 1:]
				2811
				2812	# When reading from stdin, the extension is unknown, so no cpplint tests
				2813	# should rely on the extension.
				2814	if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
				2815	and file_extension != 'cpp'):
				2816	sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
				2817	else:
				2818	ProcessFileData(filename, file_extension, lines, Error)
				2819	if carriage_return_found and os.linesep != '\r\n':
				2820	# Use 0 for linenum since outputing only one error for potentially
				2821	# several lines.
				2822	Error(filename, 0, 'whitespace/newline', 1,
				2823	'One or more unexpected \\r (^M) found;'
				2824	'better to use only a \\n')
				2825
				2826	sys.stderr.write('Done processing %s\n' % filename)
				2827
				2828
				2829	def PrintUsage(message):
				2830	"""Prints a brief usage string and exits, optionally with an error message.
				2831
				2832	Args:
				2833	message: The optional error message.
				2834	"""
				2835	sys.stderr.write(_USAGE)
				2836	if message:
				2837	sys.exit('\nFATAL ERROR: ' + message)
				2838	else:
				2839	sys.exit(1)
				2840
				2841
				2842	def PrintCategories():
				2843	"""Prints a list of all the error-categories used by error messages.
				2844
				2845	These are the categories used to filter messages via --filter.
				2846	"""
				2847	sys.stderr.write(_ERROR_CATEGORIES)
				2848	sys.exit(0)
				2849
				2850
				2851	def ParseArguments(args):
				2852	"""Parses the command line arguments.
				2853
				2854	This may set the output format and verbosity level as side-effects.
				2855
				2856	Args:
				2857	args: The command line arguments:
				2858
				2859	Returns:
				2860	The list of filenames to lint.
				2861	"""
				2862	try:
				2863	(opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
				2864	'filter='])
				2865	except getopt.GetoptError:
				2866	PrintUsage('Invalid arguments.')
				2867
				2868	verbosity = _VerboseLevel()
				2869	output_format = _OutputFormat()
				2870	filters = ''
				2871
				2872	for (opt, val) in opts:
				2873	if opt == '--help':
				2874	PrintUsage(None)
				2875	elif opt == '--output':
				2876	if not val in ('emacs', 'vs7'):
				2877	PrintUsage('The only allowed output formats are emacs and vs7.')
				2878	output_format = val
				2879	elif opt == '--verbose':
				2880	verbosity = int(val)
				2881	elif opt == '--filter':
				2882	filters = val
erg@google.com	6317a9c	2009-06-25 00:28:19 +0000	[diff] [blame^]	2883	if not filters:
maruel@google.com	fb2b8eb	2009-04-23 21:03:42 +0000	[diff] [blame]	2884	PrintCategories()
				2885
				2886	if not filenames:
				2887	PrintUsage('No files were specified.')
				2888
				2889	_SetOutputFormat(output_format)
				2890	_SetVerboseLevel(verbosity)
				2891	_SetFilters(filters)
				2892
				2893	return filenames
				2894
				2895
				2896	def main():
				2897	filenames = ParseArguments(sys.argv[1:])
				2898
				2899	# Change stderr to write with replacement characters so we don't die
				2900	# if we try to print something containing non-ASCII characters.
				2901	sys.stderr = codecs.StreamReaderWriter(sys.stderr,
				2902	codecs.getreader('utf8'),
				2903	codecs.getwriter('utf8'),
				2904	'replace')
				2905
				2906	_cpplint_state.ResetErrorCount()
				2907	for filename in filenames:
				2908	ProcessFile(filename, _cpplint_state.verbose_level)
				2909	sys.stderr.write('Total errors found: %d\n' % _cpplint_state.error_count)
				2910	sys.exit(_cpplint_state.error_count > 0)
				2911
				2912
				2913	if __name__ == '__main__':
				2914	main()