blob: 807da6b14970a9d2b9060515dd00721d1cffe5c8 [file] [log] [blame]
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001#!/usr/bin/python2.4
2#
erg@google.com26970fa2009-11-17 18:07:32 +00003# Copyright (c) 2009 Google Inc. All rights reserved.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00004#
erg@google.com26970fa2009-11-17 18:07:32 +00005# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00008#
erg@google.com26970fa2009-11-17 18:07:32 +00009# * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11# * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15# * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +000018#
erg@google.com26970fa2009-11-17 18:07:32 +000019# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +000030
31# Here are some issues that I've had people identify in my code during reviews,
32# that I think are possible to flag automatically in a lint tool. If these were
33# caught by lint, it would save time both for myself and that of my reviewers.
34# Most likely, some of these are beyond the scope of the current lint framework,
35# but I think it is valuable to retain these wish-list items even if they cannot
36# be immediately implemented.
37#
38# Suggestions
39# -----------
40# - Check for no 'explicit' for multi-arg ctor
41# - Check for boolean assign RHS in parens
42# - Check for ctor initializer-list colon position and spacing
43# - Check that if there's a ctor, there should be a dtor
44# - Check accessors that return non-pointer member variables are
45# declared const
46# - Check accessors that return non-const pointer member vars are
47# *not* declared const
48# - Check for using public includes for testing
49# - Check for spaces between brackets in one-line inline method
50# - Check for no assert()
51# - Check for spaces surrounding operators
52# - Check for 0 in pointer context (should be NULL)
53# - Check for 0 in char context (should be '\0')
54# - Check for camel-case method name conventions for methods
55# that are not simple inline getters and setters
56# - Check that base classes have virtual destructors
57# put " // namespace" after } that closes a namespace, with
58# namespace's name after 'namespace' if it is named.
59# - Do not indent namespace contents
60# - Avoid inlining non-trivial constructors in header files
61# include base/basictypes.h if DISALLOW_EVIL_CONSTRUCTORS is used
62# - Check for old-school (void) cast for call-sites of functions
63# ignored return value
64# - Check gUnit usage of anonymous namespace
65# - Check for class declaration order (typedefs, consts, enums,
66# ctor(s?), dtor, friend declarations, methods, member vars)
67#
68
69"""Does google-lint on c++ files.
70
71The goal of this script is to identify places in the code that *may*
72be in non-compliance with google style. It does not attempt to fix
73up these problems -- the point is to educate. It does also not
74attempt to find all problems, or to ensure that everything it does
75find is legitimately a problem.
76
77In particular, we can get very confused by /* and // inside strings!
78We do a small hack, which is to ignore //'s with "'s after them on the
79same line, but it is far from perfect (in either direction).
80"""
81
82import codecs
83import getopt
84import math # for log
85import os
86import re
87import sre_compile
88import string
89import sys
90import unicodedata
91
92
93_USAGE = """
94Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
erg@google.com26970fa2009-11-17 18:07:32 +000095 [--counting=total|toplevel|detailed]
maruel@google.comfb2b8eb2009-04-23 21:03:42 +000096 <file> [file] ...
97
98 The style guidelines this tries to follow are those in
99 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
100
101 Every problem is given a confidence score from 1-5, with 5 meaning we are
102 certain of the problem, and 1 meaning it could be a legitimate construct.
103 This will miss some errors, and is not a substitute for a code review.
104
105 To prevent specific lines from being linted, add a '// NOLINT' comment to the
106 end of the line.
107
108 The files passed in will be linted; at least one file must be provided.
109 Linted extensions are .cc, .cpp, and .h. Other file types will be ignored.
110
111 Flags:
112
113 output=vs7
114 By default, the output is formatted to ease emacs parsing. Visual Studio
115 compatible output (vs7) may also be used. Other formats are unsupported.
116
117 verbose=#
118 Specify a number 0-5 to restrict errors to certain verbosity levels.
119
120 filter=-x,+y,...
121 Specify a comma-separated list of category-filters to apply: only
122 error messages whose category names pass the filters will be printed.
123 (Category names are printed with the message and look like
124 "[whitespace/indent]".) Filters are evaluated left to right.
125 "-FOO" and "FOO" means "do not print categories that start with FOO".
126 "+FOO" means "do print categories that start with FOO".
127
128 Examples: --filter=-whitespace,+whitespace/braces
129 --filter=whitespace,runtime/printf,+runtime/printf_format
130 --filter=-,+build/include_what_you_use
131
132 To see a list of all the categories used in cpplint, pass no arg:
133 --filter=
erg@google.com26970fa2009-11-17 18:07:32 +0000134
135 counting=total|toplevel|detailed
136 The total number of errors found is always printed. If
137 'toplevel' is provided, then the count of errors in each of
138 the top-level categories like 'build' and 'whitespace' will
139 also be printed. If 'detailed' is provided, then a count
140 is provided for each category like 'build/class'.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000141"""
142
143# We categorize each error message we print. Here are the categories.
144# We want an explicit list so we can list them all in cpplint --filter=.
145# If you add a new error message with a new category, add it to the list
146# here! cpplint_unittest.py should tell you if you forget to do this.
erg@google.com6317a9c2009-06-25 00:28:19 +0000147# \ used for clearer layout -- pylint: disable-msg=C6013
148_ERROR_CATEGORIES = '''\
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000149 build/class
150 build/deprecated
151 build/endif_comment
152 build/forward_decl
153 build/header_guard
154 build/include
erg@google.com26970fa2009-11-17 18:07:32 +0000155 build/include_alpha
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000156 build/include_order
157 build/include_what_you_use
158 build/namespaces
159 build/printf_format
160 build/storage_class
161 legal/copyright
162 readability/braces
163 readability/casting
164 readability/check
165 readability/constructors
166 readability/fn_size
167 readability/function
168 readability/multiline_comment
169 readability/multiline_string
170 readability/streams
171 readability/todo
172 readability/utf8
173 runtime/arrays
174 runtime/casting
175 runtime/explicit
176 runtime/int
177 runtime/init
erg@google.com6317a9c2009-06-25 00:28:19 +0000178 runtime/invalid_increment
erg@google.com26970fa2009-11-17 18:07:32 +0000179 runtime/member_string_references
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000180 runtime/memset
erg@google.com26970fa2009-11-17 18:07:32 +0000181 runtime/operator
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000182 runtime/printf
183 runtime/printf_format
184 runtime/references
185 runtime/rtti
186 runtime/sizeof
187 runtime/string
188 runtime/threadsafe_fn
189 runtime/virtual
190 whitespace/blank_line
191 whitespace/braces
192 whitespace/comma
193 whitespace/comments
194 whitespace/end_of_line
195 whitespace/ending_newline
196 whitespace/indent
197 whitespace/labels
198 whitespace/line_length
199 whitespace/newline
200 whitespace/operators
201 whitespace/parens
202 whitespace/semicolon
203 whitespace/tab
204 whitespace/todo
erg@google.com6317a9c2009-06-25 00:28:19 +0000205'''
206
207# The default state of the category filter. This is overrided by the --filter=
208# flag. By default all errors are on, so only add here categories that should be
209# off by default (i.e., categories that must be enabled by the --filter= flags).
210# All entries here should start with a '-' or '+', as in the --filter= flag.
erg@google.com26970fa2009-11-17 18:07:32 +0000211_DEFAULT_FILTERS = [ '-build/include_alpha' ]
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000212
213# We used to check for high-bit characters, but after much discussion we
214# decided those were OK, as long as they were in UTF-8 and didn't represent
215# hard-coded international strings, which belong in a seperate i18n file.
216
217# Headers that we consider STL headers.
218_STL_HEADERS = frozenset([
219 'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
220 'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
221 'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
222 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
223 'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
224 'utility', 'vector', 'vector.h',
225 ])
226
227
228# Non-STL C++ system headers.
229_CPP_HEADERS = frozenset([
230 'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
231 'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
232 'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
233 'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
234 'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
235 'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
236 'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
237 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
238 'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
239 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
240 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
241 'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
242 'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
243 ])
244
245
246# Assertion macros. These are defined in base/logging.h and
247# testing/base/gunit.h. Note that the _M versions need to come first
248# for substring matching to work.
249_CHECK_MACROS = [
erg@google.com6317a9c2009-06-25 00:28:19 +0000250 'DCHECK', 'CHECK',
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000251 'EXPECT_TRUE_M', 'EXPECT_TRUE',
252 'ASSERT_TRUE_M', 'ASSERT_TRUE',
253 'EXPECT_FALSE_M', 'EXPECT_FALSE',
254 'ASSERT_FALSE_M', 'ASSERT_FALSE',
255 ]
256
erg@google.com6317a9c2009-06-25 00:28:19 +0000257# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000258_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
259
260for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
261 ('>=', 'GE'), ('>', 'GT'),
262 ('<=', 'LE'), ('<', 'LT')]:
erg@google.com6317a9c2009-06-25 00:28:19 +0000263 _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000264 _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
265 _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
266 _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
267 _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
268 _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
269
270for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
271 ('>=', 'LT'), ('>', 'LE'),
272 ('<=', 'GT'), ('<', 'GE')]:
273 _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
274 _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
275 _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
276 _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
277
278
279# These constants define types of headers for use with
280# _IncludeState.CheckNextIncludeOrder().
281_C_SYS_HEADER = 1
282_CPP_SYS_HEADER = 2
283_LIKELY_MY_HEADER = 3
284_POSSIBLE_MY_HEADER = 4
285_OTHER_HEADER = 5
286
287
288_regexp_compile_cache = {}
289
290
291def Match(pattern, s):
292 """Matches the string with the pattern, caching the compiled regexp."""
293 # The regexp compilation caching is inlined in both Match and Search for
294 # performance reasons; factoring it out into a separate function turns out
295 # to be noticeably expensive.
296 if not pattern in _regexp_compile_cache:
297 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
298 return _regexp_compile_cache[pattern].match(s)
299
300
301def Search(pattern, s):
302 """Searches the string for the pattern, caching the compiled regexp."""
303 if not pattern in _regexp_compile_cache:
304 _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
305 return _regexp_compile_cache[pattern].search(s)
306
307
308class _IncludeState(dict):
309 """Tracks line numbers for includes, and the order in which includes appear.
310
311 As a dict, an _IncludeState object serves as a mapping between include
312 filename and line number on which that file was included.
313
314 Call CheckNextIncludeOrder() once for each header in the file, passing
315 in the type constants defined above. Calls in an illegal order will
316 raise an _IncludeError with an appropriate error message.
317
318 """
319 # self._section will move monotonically through this set. If it ever
320 # needs to move backwards, CheckNextIncludeOrder will raise an error.
321 _INITIAL_SECTION = 0
322 _MY_H_SECTION = 1
323 _C_SECTION = 2
324 _CPP_SECTION = 3
325 _OTHER_H_SECTION = 4
326
327 _TYPE_NAMES = {
328 _C_SYS_HEADER: 'C system header',
329 _CPP_SYS_HEADER: 'C++ system header',
330 _LIKELY_MY_HEADER: 'header this file implements',
331 _POSSIBLE_MY_HEADER: 'header this file may implement',
332 _OTHER_HEADER: 'other header',
333 }
334 _SECTION_NAMES = {
335 _INITIAL_SECTION: "... nothing. (This can't be an error.)",
336 _MY_H_SECTION: 'a header this file implements',
337 _C_SECTION: 'C system header',
338 _CPP_SECTION: 'C++ system header',
339 _OTHER_H_SECTION: 'other header',
340 }
341
342 def __init__(self):
343 dict.__init__(self)
erg@google.com26970fa2009-11-17 18:07:32 +0000344 # The name of the current section.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000345 self._section = self._INITIAL_SECTION
erg@google.com26970fa2009-11-17 18:07:32 +0000346 # The path of last found header.
347 self._last_header = ''
348
349 def CanonicalizeAlphabeticalOrder(self, header_path):
350 """Returns a path canonicalized for alphabetical comparisson.
351
352 - replaces "-" with "_" so they both cmp the same.
353 - removes '-inl' since we don't require them to be after the main header.
354 - lowercase everything, just in case.
355
356 Args:
357 header_path: Path to be canonicalized.
358
359 Returns:
360 Canonicalized path.
361 """
362 return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
363
364 def IsInAlphabeticalOrder(self, header_path):
365 """Check if a header is in alphabetical order with the previous header.
366
367 Args:
368 header_path: Header to be checked.
369
370 Returns:
371 Returns true if the header is in alphabetical order.
372 """
373 canonical_header = self.CanonicalizeAlphabeticalOrder(header_path)
374 if self._last_header > canonical_header:
375 return False
376 self._last_header = canonical_header
377 return True
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000378
379 def CheckNextIncludeOrder(self, header_type):
380 """Returns a non-empty error message if the next header is out of order.
381
382 This function also updates the internal state to be ready to check
383 the next include.
384
385 Args:
386 header_type: One of the _XXX_HEADER constants defined above.
387
388 Returns:
389 The empty string if the header is in the right order, or an
390 error message describing what's wrong.
391
392 """
393 error_message = ('Found %s after %s' %
394 (self._TYPE_NAMES[header_type],
395 self._SECTION_NAMES[self._section]))
396
erg@google.com26970fa2009-11-17 18:07:32 +0000397 last_section = self._section
398
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000399 if header_type == _C_SYS_HEADER:
400 if self._section <= self._C_SECTION:
401 self._section = self._C_SECTION
402 else:
erg@google.com26970fa2009-11-17 18:07:32 +0000403 self._last_header = ''
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000404 return error_message
405 elif header_type == _CPP_SYS_HEADER:
406 if self._section <= self._CPP_SECTION:
407 self._section = self._CPP_SECTION
408 else:
erg@google.com26970fa2009-11-17 18:07:32 +0000409 self._last_header = ''
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000410 return error_message
411 elif header_type == _LIKELY_MY_HEADER:
412 if self._section <= self._MY_H_SECTION:
413 self._section = self._MY_H_SECTION
414 else:
415 self._section = self._OTHER_H_SECTION
416 elif header_type == _POSSIBLE_MY_HEADER:
417 if self._section <= self._MY_H_SECTION:
418 self._section = self._MY_H_SECTION
419 else:
420 # This will always be the fallback because we're not sure
421 # enough that the header is associated with this file.
422 self._section = self._OTHER_H_SECTION
423 else:
424 assert header_type == _OTHER_HEADER
425 self._section = self._OTHER_H_SECTION
426
erg@google.com26970fa2009-11-17 18:07:32 +0000427 if last_section != self._section:
428 self._last_header = ''
429
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000430 return ''
431
432
433class _CppLintState(object):
434 """Maintains module-wide state.."""
435
436 def __init__(self):
437 self.verbose_level = 1 # global setting.
438 self.error_count = 0 # global count of reported errors
erg@google.com6317a9c2009-06-25 00:28:19 +0000439 # filters to apply when emitting error messages
440 self.filters = _DEFAULT_FILTERS[:]
erg@google.com26970fa2009-11-17 18:07:32 +0000441 self.counting = 'total' # In what way are we counting errors?
442 self.errors_by_category = {} # string to int dict storing error counts
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000443
444 # output format:
445 # "emacs" - format that emacs can parse (default)
446 # "vs7" - format that Microsoft Visual Studio 7 can parse
447 self.output_format = 'emacs'
448
449 def SetOutputFormat(self, output_format):
450 """Sets the output format for errors."""
451 self.output_format = output_format
452
453 def SetVerboseLevel(self, level):
454 """Sets the module's verbosity, and returns the previous setting."""
455 last_verbose_level = self.verbose_level
456 self.verbose_level = level
457 return last_verbose_level
458
erg@google.com26970fa2009-11-17 18:07:32 +0000459 def SetCountingStyle(self, counting_style):
460 """Sets the module's counting options."""
461 self.counting = counting_style
462
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000463 def SetFilters(self, filters):
464 """Sets the error-message filters.
465
466 These filters are applied when deciding whether to emit a given
467 error message.
468
469 Args:
470 filters: A string of comma-separated filters (eg "+whitespace/indent").
471 Each filter should start with + or -; else we die.
erg@google.com6317a9c2009-06-25 00:28:19 +0000472
473 Raises:
474 ValueError: The comma-separated filters did not all start with '+' or '-'.
475 E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000476 """
erg@google.com6317a9c2009-06-25 00:28:19 +0000477 # Default filters always have less priority than the flag ones.
478 self.filters = _DEFAULT_FILTERS[:]
479 for filt in filters.split(','):
480 clean_filt = filt.strip()
481 if clean_filt:
482 self.filters.append(clean_filt)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000483 for filt in self.filters:
484 if not (filt.startswith('+') or filt.startswith('-')):
485 raise ValueError('Every filter in --filters must start with + or -'
486 ' (%s does not)' % filt)
487
erg@google.com26970fa2009-11-17 18:07:32 +0000488 def ResetErrorCounts(self):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000489 """Sets the module's error statistic back to zero."""
490 self.error_count = 0
erg@google.com26970fa2009-11-17 18:07:32 +0000491 self.errors_by_category = {}
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000492
erg@google.com26970fa2009-11-17 18:07:32 +0000493 def IncrementErrorCount(self, category):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000494 """Bumps the module's error statistic."""
495 self.error_count += 1
erg@google.com26970fa2009-11-17 18:07:32 +0000496 if self.counting in ('toplevel', 'detailed'):
497 if self.counting != 'detailed':
498 category = category.split('/')[0]
499 if category not in self.errors_by_category:
500 self.errors_by_category[category] = 0
501 self.errors_by_category[category] += 1
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000502
erg@google.com26970fa2009-11-17 18:07:32 +0000503 def PrintErrorCounts(self):
504 """Print a summary of errors by category, and the total."""
505 for category, count in self.errors_by_category.iteritems():
506 sys.stderr.write('Category \'%s\' errors found: %d\n' %
507 (category, count))
508 sys.stderr.write('Total errors found: %d\n' % self.error_count)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000509
510_cpplint_state = _CppLintState()
511
512
513def _OutputFormat():
514 """Gets the module's output format."""
515 return _cpplint_state.output_format
516
517
518def _SetOutputFormat(output_format):
519 """Sets the module's output format."""
520 _cpplint_state.SetOutputFormat(output_format)
521
522
523def _VerboseLevel():
524 """Returns the module's verbosity setting."""
525 return _cpplint_state.verbose_level
526
527
528def _SetVerboseLevel(level):
529 """Sets the module's verbosity, and returns the previous setting."""
530 return _cpplint_state.SetVerboseLevel(level)
531
532
erg@google.com26970fa2009-11-17 18:07:32 +0000533def _SetCountingStyle(level):
534 """Sets the module's counting options."""
535 _cpplint_state.SetCountingStyle(level)
536
537
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000538def _Filters():
539 """Returns the module's list of output filters, as a list."""
540 return _cpplint_state.filters
541
542
543def _SetFilters(filters):
544 """Sets the module's error-message filters.
545
546 These filters are applied when deciding whether to emit a given
547 error message.
548
549 Args:
550 filters: A string of comma-separated filters (eg "whitespace/indent").
551 Each filter should start with + or -; else we die.
552 """
553 _cpplint_state.SetFilters(filters)
554
555
556class _FunctionState(object):
557 """Tracks current function name and the number of lines in its body."""
558
559 _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
560 _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
561
562 def __init__(self):
563 self.in_a_function = False
564 self.lines_in_function = 0
565 self.current_function = ''
566
567 def Begin(self, function_name):
568 """Start analyzing function body.
569
570 Args:
571 function_name: The name of the function being tracked.
572 """
573 self.in_a_function = True
574 self.lines_in_function = 0
575 self.current_function = function_name
576
577 def Count(self):
578 """Count line in current function body."""
579 if self.in_a_function:
580 self.lines_in_function += 1
581
582 def Check(self, error, filename, linenum):
583 """Report if too many lines in function body.
584
585 Args:
586 error: The function to call with any errors found.
587 filename: The name of the current file.
588 linenum: The number of the line to check.
589 """
590 if Match(r'T(EST|est)', self.current_function):
591 base_trigger = self._TEST_TRIGGER
592 else:
593 base_trigger = self._NORMAL_TRIGGER
594 trigger = base_trigger * 2**_VerboseLevel()
595
596 if self.lines_in_function > trigger:
597 error_level = int(math.log(self.lines_in_function / base_trigger, 2))
598 # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
599 if error_level > 5:
600 error_level = 5
601 error(filename, linenum, 'readability/fn_size', error_level,
602 'Small and focused functions are preferred:'
603 ' %s has %d non-comment lines'
604 ' (error triggered by exceeding %d lines).' % (
605 self.current_function, self.lines_in_function, trigger))
606
607 def End(self):
608 """Stop analizing function body."""
609 self.in_a_function = False
610
611
612class _IncludeError(Exception):
613 """Indicates a problem with the include order in a file."""
614 pass
615
616
617class FileInfo:
618 """Provides utility functions for filenames.
619
620 FileInfo provides easy access to the components of a file's path
621 relative to the project root.
622 """
623
624 def __init__(self, filename):
625 self._filename = filename
626
627 def FullName(self):
628 """Make Windows paths like Unix."""
629 return os.path.abspath(self._filename).replace('\\', '/')
630
631 def RepositoryName(self):
632 """FullName after removing the local path to the repository.
633
634 If we have a real absolute path name here we can try to do something smart:
635 detecting the root of the checkout and truncating /path/to/checkout from
636 the name so that we get header guards that don't include things like
637 "C:\Documents and Settings\..." or "/home/username/..." in them and thus
638 people on different computers who have checked the source out to different
639 locations won't see bogus errors.
640 """
641 fullname = self.FullName()
642
643 if os.path.exists(fullname):
644 project_dir = os.path.dirname(fullname)
645
646 if os.path.exists(os.path.join(project_dir, ".svn")):
647 # If there's a .svn file in the current directory, we recursively look
648 # up the directory tree for the top of the SVN checkout
649 root_dir = project_dir
650 one_up_dir = os.path.dirname(root_dir)
651 while os.path.exists(os.path.join(one_up_dir, ".svn")):
652 root_dir = os.path.dirname(root_dir)
653 one_up_dir = os.path.dirname(one_up_dir)
654
655 prefix = os.path.commonprefix([root_dir, project_dir])
656 return fullname[len(prefix) + 1:]
657
658 # Not SVN? Try to find a git top level directory by searching up from the
659 # current path.
660 root_dir = os.path.dirname(fullname)
661 while (root_dir != os.path.dirname(root_dir) and
662 not os.path.exists(os.path.join(root_dir, ".git"))):
663 root_dir = os.path.dirname(root_dir)
664 if os.path.exists(os.path.join(root_dir, ".git")):
665 prefix = os.path.commonprefix([root_dir, project_dir])
666 return fullname[len(prefix) + 1:]
667
668 # Don't know what to do; header guard warnings may be wrong...
669 return fullname
670
671 def Split(self):
672 """Splits the file into the directory, basename, and extension.
673
674 For 'chrome/browser/browser.cc', Split() would
675 return ('chrome/browser', 'browser', '.cc')
676
677 Returns:
678 A tuple of (directory, basename, extension).
679 """
680
681 googlename = self.RepositoryName()
682 project, rest = os.path.split(googlename)
683 return (project,) + os.path.splitext(rest)
684
685 def BaseName(self):
686 """File base name - text after the final slash, before the final period."""
687 return self.Split()[1]
688
689 def Extension(self):
690 """File extension - text following the final period."""
691 return self.Split()[2]
692
693 def NoExtension(self):
694 """File has no source file extension."""
695 return '/'.join(self.Split()[0:2])
696
697 def IsSource(self):
698 """File has a source file extension."""
699 return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
700
701
702def _ShouldPrintError(category, confidence):
703 """Returns true iff confidence >= verbose, and category passes filter."""
704 # There are two ways we might decide not to print an error message:
705 # the verbosity level isn't high enough, or the filters filter it out.
706 if confidence < _cpplint_state.verbose_level:
707 return False
708
709 is_filtered = False
710 for one_filter in _Filters():
711 if one_filter.startswith('-'):
712 if category.startswith(one_filter[1:]):
713 is_filtered = True
714 elif one_filter.startswith('+'):
715 if category.startswith(one_filter[1:]):
716 is_filtered = False
717 else:
718 assert False # should have been checked for in SetFilter.
719 if is_filtered:
720 return False
721
722 return True
723
724
725def Error(filename, linenum, category, confidence, message):
726 """Logs the fact we've found a lint error.
727
728 We log where the error was found, and also our confidence in the error,
729 that is, how certain we are this is a legitimate style regression, and
730 not a misidentification or a use that's sometimes justified.
731
732 Args:
733 filename: The name of the file containing the error.
734 linenum: The number of the line containing the error.
735 category: A string used to describe the "category" this bug
736 falls under: "whitespace", say, or "runtime". Categories
737 may have a hierarchy separated by slashes: "whitespace/indent".
738 confidence: A number from 1-5 representing a confidence score for
739 the error, with 5 meaning that we are certain of the problem,
740 and 1 meaning that it could be a legitimate construct.
741 message: The error message.
742 """
743 # There are two ways we might decide not to print an error message:
744 # the verbosity level isn't high enough, or the filters filter it out.
745 if _ShouldPrintError(category, confidence):
erg@google.com26970fa2009-11-17 18:07:32 +0000746 _cpplint_state.IncrementErrorCount(category)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000747 if _cpplint_state.output_format == 'vs7':
748 sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
749 filename, linenum, message, category, confidence))
750 else:
751 sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
752 filename, linenum, message, category, confidence))
753
754
755# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
756_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
757 r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
758# Matches strings. Escape codes should already be removed by ESCAPES.
759_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
760# Matches characters. Escape codes should already be removed by ESCAPES.
761_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
762# Matches multi-line C++ comments.
763# This RE is a little bit more complicated than one might expect, because we
764# have to take care of space removals tools so we can handle comments inside
765# statements better.
766# The current rule is: We only clear spaces from both sides when we're at the
767# end of the line. Otherwise, we try to remove spaces from the right side,
768# if this doesn't work we try on left side but only if there's a non-character
769# on the right.
770_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
771 r"""(\s*/\*.*\*/\s*$|
772 /\*.*\*/\s+|
773 \s+/\*.*\*/(?=\W)|
774 /\*.*\*/)""", re.VERBOSE)
775
776
777def IsCppString(line):
778 """Does line terminate so, that the next symbol is in string constant.
779
780 This function does not consider single-line nor multi-line comments.
781
782 Args:
783 line: is a partial line of code starting from the 0..n.
784
785 Returns:
786 True, if next character appended to 'line' is inside a
787 string constant.
788 """
789
790 line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
791 return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
792
793
794def FindNextMultiLineCommentStart(lines, lineix):
795 """Find the beginning marker for a multiline comment."""
796 while lineix < len(lines):
797 if lines[lineix].strip().startswith('/*'):
798 # Only return this marker if the comment goes beyond this line
799 if lines[lineix].strip().find('*/', 2) < 0:
800 return lineix
801 lineix += 1
802 return len(lines)
803
804
805def FindNextMultiLineCommentEnd(lines, lineix):
806 """We are inside a comment, find the end marker."""
807 while lineix < len(lines):
808 if lines[lineix].strip().endswith('*/'):
809 return lineix
810 lineix += 1
811 return len(lines)
812
813
814def RemoveMultiLineCommentsFromRange(lines, begin, end):
815 """Clears a range of lines for multi-line comments."""
816 # Having // dummy comments makes the lines non-empty, so we will not get
817 # unnecessary blank line warnings later in the code.
818 for i in range(begin, end):
819 lines[i] = '// dummy'
820
821
822def RemoveMultiLineComments(filename, lines, error):
823 """Removes multiline (c-style) comments from lines."""
824 lineix = 0
825 while lineix < len(lines):
826 lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
827 if lineix_begin >= len(lines):
828 return
829 lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
830 if lineix_end >= len(lines):
831 error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
832 'Could not find end of multi-line comment')
833 return
834 RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
835 lineix = lineix_end + 1
836
837
838def CleanseComments(line):
839 """Removes //-comments and single-line C-style /* */ comments.
840
841 Args:
842 line: A line of C++ source.
843
844 Returns:
845 The line with single-line comments removed.
846 """
847 commentpos = line.find('//')
848 if commentpos != -1 and not IsCppString(line[:commentpos]):
849 line = line[:commentpos]
850 # get rid of /* ... */
851 return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
852
853
erg@google.com6317a9c2009-06-25 00:28:19 +0000854class CleansedLines(object):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000855 """Holds 3 copies of all lines with different preprocessing applied to them.
856
857 1) elided member contains lines without strings and comments,
858 2) lines member contains lines without comments, and
859 3) raw member contains all the lines without processing.
860 All these three members are of <type 'list'>, and of the same length.
861 """
862
863 def __init__(self, lines):
864 self.elided = []
865 self.lines = []
866 self.raw_lines = lines
867 self.num_lines = len(lines)
868 for linenum in range(len(lines)):
869 self.lines.append(CleanseComments(lines[linenum]))
870 elided = self._CollapseStrings(lines[linenum])
871 self.elided.append(CleanseComments(elided))
872
873 def NumLines(self):
874 """Returns the number of lines represented."""
875 return self.num_lines
876
877 @staticmethod
878 def _CollapseStrings(elided):
879 """Collapses strings and chars on a line to simple "" or '' blocks.
880
881 We nix strings first so we're not fooled by text like '"http://"'
882
883 Args:
884 elided: The line being processed.
885
886 Returns:
887 The line with collapsed strings.
888 """
889 if not _RE_PATTERN_INCLUDE.match(elided):
890 # Remove escaped characters first to make quote/single quote collapsing
891 # basic. Things that look like escaped characters shouldn't occur
892 # outside of strings and chars.
893 elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
894 elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
895 elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
896 return elided
897
898
899def CloseExpression(clean_lines, linenum, pos):
900 """If input points to ( or { or [, finds the position that closes it.
901
902 If lines[linenum][pos] points to a '(' or '{' or '[', finds the the
903 linenum/pos that correspond to the closing of the expression.
904
905 Args:
906 clean_lines: A CleansedLines instance containing the file.
907 linenum: The number of the line to check.
908 pos: A position on the line.
909
910 Returns:
911 A tuple (line, linenum, pos) pointer *past* the closing brace, or
912 (line, len(lines), -1) if we never find a close. Note we ignore
913 strings and comments when matching; and the line we return is the
914 'cleansed' line at linenum.
915 """
916
917 line = clean_lines.elided[linenum]
918 startchar = line[pos]
919 if startchar not in '({[':
920 return (line, clean_lines.NumLines(), -1)
921 if startchar == '(': endchar = ')'
922 if startchar == '[': endchar = ']'
923 if startchar == '{': endchar = '}'
924
925 num_open = line.count(startchar) - line.count(endchar)
926 while linenum < clean_lines.NumLines() and num_open > 0:
927 linenum += 1
928 line = clean_lines.elided[linenum]
929 num_open += line.count(startchar) - line.count(endchar)
930 # OK, now find the endchar that actually got us back to even
931 endpos = len(line)
932 while num_open >= 0:
933 endpos = line.rfind(')', 0, endpos)
934 num_open -= 1 # chopped off another )
935 return (line, linenum, endpos + 1)
936
937
938def CheckForCopyright(filename, lines, error):
939 """Logs an error if no Copyright message appears at the top of the file."""
940
941 # We'll say it should occur by line 10. Don't forget there's a
942 # dummy line at the front.
943 for line in xrange(1, min(len(lines), 11)):
944 if re.search(r'Copyright', lines[line], re.I): break
945 else: # means no copyright line was found
946 error(filename, 0, 'legal/copyright', 5,
947 'No copyright message found. '
948 'You should have a line: "Copyright [year] <Copyright Owner>"')
949
950
951def GetHeaderGuardCPPVariable(filename):
952 """Returns the CPP variable that should be used as a header guard.
953
954 Args:
955 filename: The name of a C++ header file.
956
957 Returns:
958 The CPP variable that should be used as a header guard in the
959 named file.
960
961 """
962
963 fileinfo = FileInfo(filename)
964 return re.sub(r'[-./\s]', '_', fileinfo.RepositoryName()).upper() + '_'
965
966
967def CheckForHeaderGuard(filename, lines, error):
968 """Checks that the file contains a header guard.
969
erg@google.com6317a9c2009-06-25 00:28:19 +0000970 Logs an error if no #ifndef header guard is present. For other
maruel@google.comfb2b8eb2009-04-23 21:03:42 +0000971 headers, checks that the full pathname is used.
972
973 Args:
974 filename: The name of the C++ header file.
975 lines: An array of strings, each representing a line of the file.
976 error: The function to call with any errors found.
977 """
978
979 cppvar = GetHeaderGuardCPPVariable(filename)
980
981 ifndef = None
982 ifndef_linenum = 0
983 define = None
984 endif = None
985 endif_linenum = 0
986 for linenum, line in enumerate(lines):
987 linesplit = line.split()
988 if len(linesplit) >= 2:
989 # find the first occurrence of #ifndef and #define, save arg
990 if not ifndef and linesplit[0] == '#ifndef':
991 # set ifndef to the header guard presented on the #ifndef line.
992 ifndef = linesplit[1]
993 ifndef_linenum = linenum
994 if not define and linesplit[0] == '#define':
995 define = linesplit[1]
996 # find the last occurrence of #endif, save entire line
997 if line.startswith('#endif'):
998 endif = line
999 endif_linenum = linenum
1000
1001 if not ifndef or not define or ifndef != define:
1002 error(filename, 0, 'build/header_guard', 5,
1003 'No #ifndef header guard found, suggested CPP variable is: %s' %
1004 cppvar)
1005 return
1006
1007 # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
1008 # for backward compatibility.
erg@google.com26970fa2009-11-17 18:07:32 +00001009 if ifndef != cppvar and not Search(r'\bNOLINT\b', lines[ifndef_linenum]):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001010 error_level = 0
1011 if ifndef != cppvar + '_':
1012 error_level = 5
1013
1014 error(filename, ifndef_linenum, 'build/header_guard', error_level,
1015 '#ifndef header guard has wrong style, please use: %s' % cppvar)
1016
erg@google.com26970fa2009-11-17 18:07:32 +00001017 if (endif != ('#endif // %s' % cppvar) and
1018 not Search(r'\bNOLINT\b', lines[endif_linenum])):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001019 error_level = 0
1020 if endif != ('#endif // %s' % (cppvar + '_')):
1021 error_level = 5
1022
1023 error(filename, endif_linenum, 'build/header_guard', error_level,
1024 '#endif line should be "#endif // %s"' % cppvar)
1025
1026
1027def CheckForUnicodeReplacementCharacters(filename, lines, error):
1028 """Logs an error for each line containing Unicode replacement characters.
1029
1030 These indicate that either the file contained invalid UTF-8 (likely)
1031 or Unicode replacement characters (which it shouldn't). Note that
1032 it's possible for this to throw off line numbering if the invalid
1033 UTF-8 occurred adjacent to a newline.
1034
1035 Args:
1036 filename: The name of the current file.
1037 lines: An array of strings, each representing a line of the file.
1038 error: The function to call with any errors found.
1039 """
1040 for linenum, line in enumerate(lines):
1041 if u'\ufffd' in line:
1042 error(filename, linenum, 'readability/utf8', 5,
1043 'Line contains invalid UTF-8 (or Unicode replacement character).')
1044
1045
1046def CheckForNewlineAtEOF(filename, lines, error):
1047 """Logs an error if there is no newline char at the end of the file.
1048
1049 Args:
1050 filename: The name of the current file.
1051 lines: An array of strings, each representing a line of the file.
1052 error: The function to call with any errors found.
1053 """
1054
1055 # The array lines() was created by adding two newlines to the
1056 # original file (go figure), then splitting on \n.
1057 # To verify that the file ends in \n, we just have to make sure the
1058 # last-but-two element of lines() exists and is empty.
1059 if len(lines) < 3 or lines[-2]:
1060 error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
1061 'Could not find a newline character at the end of the file.')
1062
1063
1064def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
1065 """Logs an error if we see /* ... */ or "..." that extend past one line.
1066
1067 /* ... */ comments are legit inside macros, for one line.
1068 Otherwise, we prefer // comments, so it's ok to warn about the
1069 other. Likewise, it's ok for strings to extend across multiple
1070 lines, as long as a line continuation character (backslash)
1071 terminates each line. Although not currently prohibited by the C++
1072 style guide, it's ugly and unnecessary. We don't do well with either
1073 in this lint program, so we warn about both.
1074
1075 Args:
1076 filename: The name of the current file.
1077 clean_lines: A CleansedLines instance containing the file.
1078 linenum: The number of the line to check.
1079 error: The function to call with any errors found.
1080 """
1081 line = clean_lines.elided[linenum]
1082
1083 # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1084 # second (escaped) slash may trigger later \" detection erroneously.
1085 line = line.replace('\\\\', '')
1086
1087 if line.count('/*') > line.count('*/'):
1088 error(filename, linenum, 'readability/multiline_comment', 5,
1089 'Complex multi-line /*...*/-style comment found. '
1090 'Lint may give bogus warnings. '
1091 'Consider replacing these with //-style comments, '
1092 'with #if 0...#endif, '
1093 'or with more clearly structured multi-line comments.')
1094
1095 if (line.count('"') - line.count('\\"')) % 2:
1096 error(filename, linenum, 'readability/multiline_string', 5,
1097 'Multi-line string ("...") found. This lint script doesn\'t '
1098 'do well with such strings, and may give bogus warnings. They\'re '
1099 'ugly and unnecessary, and you should use concatenation instead".')
1100
1101
1102threading_list = (
1103 ('asctime(', 'asctime_r('),
1104 ('ctime(', 'ctime_r('),
1105 ('getgrgid(', 'getgrgid_r('),
1106 ('getgrnam(', 'getgrnam_r('),
1107 ('getlogin(', 'getlogin_r('),
1108 ('getpwnam(', 'getpwnam_r('),
1109 ('getpwuid(', 'getpwuid_r('),
1110 ('gmtime(', 'gmtime_r('),
1111 ('localtime(', 'localtime_r('),
1112 ('rand(', 'rand_r('),
1113 ('readdir(', 'readdir_r('),
1114 ('strtok(', 'strtok_r('),
1115 ('ttyname(', 'ttyname_r('),
1116 )
1117
1118
1119def CheckPosixThreading(filename, clean_lines, linenum, error):
1120 """Checks for calls to thread-unsafe functions.
1121
1122 Much code has been originally written without consideration of
1123 multi-threading. Also, engineers are relying on their old experience;
1124 they have learned posix before threading extensions were added. These
1125 tests guide the engineers to use thread-safe functions (when using
1126 posix directly).
1127
1128 Args:
1129 filename: The name of the current file.
1130 clean_lines: A CleansedLines instance containing the file.
1131 linenum: The number of the line to check.
1132 error: The function to call with any errors found.
1133 """
1134 line = clean_lines.elided[linenum]
1135 for single_thread_function, multithread_safe_function in threading_list:
1136 ix = line.find(single_thread_function)
erg@google.com6317a9c2009-06-25 00:28:19 +00001137 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001138 if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
1139 line[ix - 1] not in ('_', '.', '>'))):
1140 error(filename, linenum, 'runtime/threadsafe_fn', 2,
1141 'Consider using ' + multithread_safe_function +
1142 '...) instead of ' + single_thread_function +
1143 '...) for improved thread safety.')
1144
1145
erg@google.com26970fa2009-11-17 18:07:32 +00001146# Matches invalid increment: *count++, which moves pointer instead of
erg@google.com6317a9c2009-06-25 00:28:19 +00001147# incrementing a value.
erg@google.com26970fa2009-11-17 18:07:32 +00001148_RE_PATTERN_INVALID_INCREMENT = re.compile(
erg@google.com6317a9c2009-06-25 00:28:19 +00001149 r'^\s*\*\w+(\+\+|--);')
1150
1151
1152def CheckInvalidIncrement(filename, clean_lines, linenum, error):
erg@google.com26970fa2009-11-17 18:07:32 +00001153 """Checks for invalid increment *count++.
erg@google.com6317a9c2009-06-25 00:28:19 +00001154
1155 For example following function:
1156 void increment_counter(int* count) {
1157 *count++;
1158 }
1159 is invalid, because it effectively does count++, moving pointer, and should
1160 be replaced with ++*count, (*count)++ or *count += 1.
1161
1162 Args:
1163 filename: The name of the current file.
1164 clean_lines: A CleansedLines instance containing the file.
1165 linenum: The number of the line to check.
1166 error: The function to call with any errors found.
1167 """
1168 line = clean_lines.elided[linenum]
erg@google.com26970fa2009-11-17 18:07:32 +00001169 if _RE_PATTERN_INVALID_INCREMENT.match(line):
erg@google.com6317a9c2009-06-25 00:28:19 +00001170 error(filename, linenum, 'runtime/invalid_increment', 5,
1171 'Changing pointer instead of value (or unused value of operator*).')
1172
1173
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001174class _ClassInfo(object):
1175 """Stores information about a class."""
1176
1177 def __init__(self, name, linenum):
1178 self.name = name
1179 self.linenum = linenum
1180 self.seen_open_brace = False
1181 self.is_derived = False
1182 self.virtual_method_linenumber = None
1183 self.has_virtual_destructor = False
1184 self.brace_depth = 0
1185
1186
1187class _ClassState(object):
1188 """Holds the current state of the parse relating to class declarations.
1189
1190 It maintains a stack of _ClassInfos representing the parser's guess
1191 as to the current nesting of class declarations. The innermost class
1192 is at the top (back) of the stack. Typically, the stack will either
1193 be empty or have exactly one entry.
1194 """
1195
1196 def __init__(self):
1197 self.classinfo_stack = []
1198
1199 def CheckFinished(self, filename, error):
1200 """Checks that all classes have been completely parsed.
1201
1202 Call this when all lines in a file have been processed.
1203 Args:
1204 filename: The name of the current file.
1205 error: The function to call with any errors found.
1206 """
1207 if self.classinfo_stack:
1208 # Note: This test can result in false positives if #ifdef constructs
1209 # get in the way of brace matching. See the testBuildClass test in
1210 # cpplint_unittest.py for an example of this.
1211 error(filename, self.classinfo_stack[0].linenum, 'build/class', 5,
1212 'Failed to find complete declaration of class %s' %
1213 self.classinfo_stack[0].name)
1214
1215
1216def CheckForNonStandardConstructs(filename, clean_lines, linenum,
1217 class_state, error):
1218 """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
1219
1220 Complain about several constructs which gcc-2 accepts, but which are
1221 not standard C++. Warning about these in lint is one way to ease the
1222 transition to new compilers.
1223 - put storage class first (e.g. "static const" instead of "const static").
1224 - "%lld" instead of %qd" in printf-type functions.
1225 - "%1$d" is non-standard in printf-type functions.
1226 - "\%" is an undefined character escape sequence.
1227 - text after #endif is not allowed.
1228 - invalid inner-style forward declaration.
1229 - >? and <? operators, and their >?= and <?= cousins.
1230 - classes with virtual methods need virtual destructors (compiler warning
1231 available, but not turned on yet.)
1232
erg@google.com26970fa2009-11-17 18:07:32 +00001233 Additionally, check for constructor/destructor style violations and reference
1234 members, as it is very convenient to do so while checking for
1235 gcc-2 compliance.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001236
1237 Args:
1238 filename: The name of the current file.
1239 clean_lines: A CleansedLines instance containing the file.
1240 linenum: The number of the line to check.
1241 class_state: A _ClassState instance which maintains information about
1242 the current stack of nested class declarations being parsed.
1243 error: A callable to which errors are reported, which takes 4 arguments:
1244 filename, line number, error level, and message
1245 """
1246
1247 # Remove comments from the line, but leave in strings for now.
1248 line = clean_lines.lines[linenum]
1249
1250 if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
1251 error(filename, linenum, 'runtime/printf_format', 3,
1252 '%q in format strings is deprecated. Use %ll instead.')
1253
1254 if Search(r'printf\s*\(.*".*%\d+\$', line):
1255 error(filename, linenum, 'runtime/printf_format', 2,
1256 '%N$ formats are unconventional. Try rewriting to avoid them.')
1257
1258 # Remove escaped backslashes before looking for undefined escapes.
1259 line = line.replace('\\\\', '')
1260
1261 if Search(r'("|\').*\\(%|\[|\(|{)', line):
1262 error(filename, linenum, 'build/printf_format', 3,
1263 '%, [, (, and { are undefined character escapes. Unescape them.')
1264
1265 # For the rest, work with both comments and strings removed.
1266 line = clean_lines.elided[linenum]
1267
1268 if Search(r'\b(const|volatile|void|char|short|int|long'
1269 r'|float|double|signed|unsigned'
1270 r'|schar|u?int8|u?int16|u?int32|u?int64)'
1271 r'\s+(auto|register|static|extern|typedef)\b',
1272 line):
1273 error(filename, linenum, 'build/storage_class', 5,
1274 'Storage class (static, extern, typedef, etc) should be first.')
1275
1276 if Match(r'\s*#\s*endif\s*[^/\s]+', line):
1277 error(filename, linenum, 'build/endif_comment', 5,
1278 'Uncommented text after #endif is non-standard. Use a comment.')
1279
1280 if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
1281 error(filename, linenum, 'build/forward_decl', 5,
1282 'Inner-style forward declarations are invalid. Remove this line.')
1283
1284 if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
1285 line):
1286 error(filename, linenum, 'build/deprecated', 3,
1287 '>? and <? (max and min) operators are non-standard and deprecated.')
1288
erg@google.com26970fa2009-11-17 18:07:32 +00001289 if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
1290 # TODO(unknown): Could it be expanded safely to arbitrary references,
1291 # without triggering too many false positives? The first
1292 # attempt triggered 5 warnings for mostly benign code in the regtest, hence
1293 # the restriction.
1294 # Here's the original regexp, for the reference:
1295 # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
1296 # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
1297 error(filename, linenum, 'runtime/member_string_references', 2,
1298 'const string& members are dangerous. It is much better to use '
1299 'alternatives, such as pointers or simple constants.')
1300
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001301 # Track class entry and exit, and attempt to find cases within the
1302 # class declaration that don't meet the C++ style
1303 # guidelines. Tracking is very dependent on the code matching Google
1304 # style guidelines, but it seems to perform well enough in testing
1305 # to be a worthwhile addition to the checks.
1306 classinfo_stack = class_state.classinfo_stack
1307 # Look for a class declaration
1308 class_decl_match = Match(
1309 r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
1310 if class_decl_match:
1311 classinfo_stack.append(_ClassInfo(class_decl_match.group(3), linenum))
1312
1313 # Everything else in this function uses the top of the stack if it's
1314 # not empty.
1315 if not classinfo_stack:
1316 return
1317
1318 classinfo = classinfo_stack[-1]
1319
1320 # If the opening brace hasn't been seen look for it and also
1321 # parent class declarations.
1322 if not classinfo.seen_open_brace:
1323 # If the line has a ';' in it, assume it's a forward declaration or
1324 # a single-line class declaration, which we won't process.
1325 if line.find(';') != -1:
1326 classinfo_stack.pop()
1327 return
1328 classinfo.seen_open_brace = (line.find('{') != -1)
1329 # Look for a bare ':'
1330 if Search('(^|[^:]):($|[^:])', line):
1331 classinfo.is_derived = True
1332 if not classinfo.seen_open_brace:
1333 return # Everything else in this function is for after open brace
1334
1335 # The class may have been declared with namespace or classname qualifiers.
1336 # The constructor and destructor will not have those qualifiers.
1337 base_classname = classinfo.name.split('::')[-1]
1338
1339 # Look for single-argument constructors that aren't marked explicit.
1340 # Technically a valid construct, but against style.
1341 args = Match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
1342 % re.escape(base_classname),
1343 line)
1344 if (args and
1345 args.group(1) != 'void' and
1346 not Match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
1347 args.group(1).strip())):
1348 error(filename, linenum, 'runtime/explicit', 5,
1349 'Single-argument constructors should be marked explicit.')
1350
1351 # Look for methods declared virtual.
1352 if Search(r'\bvirtual\b', line):
1353 classinfo.virtual_method_linenumber = linenum
1354 # Only look for a destructor declaration on the same line. It would
1355 # be extremely unlikely for the destructor declaration to occupy
1356 # more than one line.
1357 if Search(r'~%s\s*\(' % base_classname, line):
1358 classinfo.has_virtual_destructor = True
1359
1360 # Look for class end.
1361 brace_depth = classinfo.brace_depth
1362 brace_depth = brace_depth + line.count('{') - line.count('}')
1363 if brace_depth <= 0:
1364 classinfo = classinfo_stack.pop()
1365 # Try to detect missing virtual destructor declarations.
1366 # For now, only warn if a non-derived class with virtual methods lacks
1367 # a virtual destructor. This is to make it less likely that people will
1368 # declare derived virtual destructors without declaring the base
1369 # destructor virtual.
1370 if ((classinfo.virtual_method_linenumber is not None) and
1371 (not classinfo.has_virtual_destructor) and
1372 (not classinfo.is_derived)): # Only warn for base classes
1373 error(filename, classinfo.linenum, 'runtime/virtual', 4,
1374 'The class %s probably needs a virtual destructor due to '
1375 'having virtual method(s), one declared at line %d.'
1376 % (classinfo.name, classinfo.virtual_method_linenumber))
1377 else:
1378 classinfo.brace_depth = brace_depth
1379
1380
1381def CheckSpacingForFunctionCall(filename, line, linenum, error):
1382 """Checks for the correctness of various spacing around function calls.
1383
1384 Args:
1385 filename: The name of the current file.
1386 line: The text of the line to check.
1387 linenum: The number of the line to check.
1388 error: The function to call with any errors found.
1389 """
1390
1391 # Since function calls often occur inside if/for/while/switch
1392 # expressions - which have their own, more liberal conventions - we
1393 # first see if we should be looking inside such an expression for a
1394 # function call, to which we can apply more strict standards.
1395 fncall = line # if there's no control flow construct, look at whole line
1396 for pattern in (r'\bif\s*\((.*)\)\s*{',
1397 r'\bfor\s*\((.*)\)\s*{',
1398 r'\bwhile\s*\((.*)\)\s*[{;]',
1399 r'\bswitch\s*\((.*)\)\s*{'):
1400 match = Search(pattern, line)
1401 if match:
1402 fncall = match.group(1) # look inside the parens for function calls
1403 break
1404
1405 # Except in if/for/while/switch, there should never be space
1406 # immediately inside parens (eg "f( 3, 4 )"). We make an exception
1407 # for nested parens ( (a+b) + c ). Likewise, there should never be
1408 # a space before a ( when it's a function argument. I assume it's a
1409 # function argument when the char before the whitespace is legal in
1410 # a function name (alnum + _) and we're not starting a macro. Also ignore
1411 # pointers and references to arrays and functions coz they're too tricky:
1412 # we use a very simple way to recognize these:
1413 # " (something)(maybe-something)" or
1414 # " (something)(maybe-something," or
1415 # " (something)[something]"
1416 # Note that we assume the contents of [] to be short enough that
1417 # they'll never need to wrap.
1418 if ( # Ignore control structures.
1419 not Search(r'\b(if|for|while|switch|return|delete)\b', fncall) and
1420 # Ignore pointers/references to functions.
1421 not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
1422 # Ignore pointers/references to arrays.
1423 not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
erg@google.com6317a9c2009-06-25 00:28:19 +00001424 if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001425 error(filename, linenum, 'whitespace/parens', 4,
1426 'Extra space after ( in function call')
erg@google.com6317a9c2009-06-25 00:28:19 +00001427 elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001428 error(filename, linenum, 'whitespace/parens', 2,
1429 'Extra space after (')
1430 if (Search(r'\w\s+\(', fncall) and
1431 not Search(r'#\s*define|typedef', fncall)):
1432 error(filename, linenum, 'whitespace/parens', 4,
1433 'Extra space before ( in function call')
1434 # If the ) is followed only by a newline or a { + newline, assume it's
1435 # part of a control statement (if/while/etc), and don't complain
1436 if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
1437 error(filename, linenum, 'whitespace/parens', 2,
1438 'Extra space before )')
1439
1440
1441def IsBlankLine(line):
1442 """Returns true if the given line is blank.
1443
1444 We consider a line to be blank if the line is empty or consists of
1445 only white spaces.
1446
1447 Args:
1448 line: A line of a string.
1449
1450 Returns:
1451 True, if the given line is blank.
1452 """
1453 return not line or line.isspace()
1454
1455
1456def CheckForFunctionLengths(filename, clean_lines, linenum,
1457 function_state, error):
1458 """Reports for long function bodies.
1459
1460 For an overview why this is done, see:
1461 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
1462
1463 Uses a simplistic algorithm assuming other style guidelines
1464 (especially spacing) are followed.
1465 Only checks unindented functions, so class members are unchecked.
1466 Trivial bodies are unchecked, so constructors with huge initializer lists
1467 may be missed.
1468 Blank/comment lines are not counted so as to avoid encouraging the removal
1469 of vertical space and commments just to get through a lint check.
1470 NOLINT *on the last line of a function* disables this check.
1471
1472 Args:
1473 filename: The name of the current file.
1474 clean_lines: A CleansedLines instance containing the file.
1475 linenum: The number of the line to check.
1476 function_state: Current function name and lines in body so far.
1477 error: The function to call with any errors found.
1478 """
1479 lines = clean_lines.lines
1480 line = lines[linenum]
1481 raw = clean_lines.raw_lines
1482 raw_line = raw[linenum]
1483 joined_line = ''
1484
1485 starting_func = False
erg@google.com6317a9c2009-06-25 00:28:19 +00001486 regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ...
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001487 match_result = Match(regexp, line)
1488 if match_result:
1489 # If the name is all caps and underscores, figure it's a macro and
1490 # ignore it, unless it's TEST or TEST_F.
1491 function_name = match_result.group(1).split()[-1]
1492 if function_name == 'TEST' or function_name == 'TEST_F' or (
1493 not Match(r'[A-Z_]+$', function_name)):
1494 starting_func = True
1495
1496 if starting_func:
1497 body_found = False
erg@google.com6317a9c2009-06-25 00:28:19 +00001498 for start_linenum in xrange(linenum, clean_lines.NumLines()):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001499 start_line = lines[start_linenum]
1500 joined_line += ' ' + start_line.lstrip()
1501 if Search(r'(;|})', start_line): # Declarations and trivial functions
1502 body_found = True
1503 break # ... ignore
1504 elif Search(r'{', start_line):
1505 body_found = True
1506 function = Search(r'((\w|:)*)\(', line).group(1)
1507 if Match(r'TEST', function): # Handle TEST... macros
1508 parameter_regexp = Search(r'(\(.*\))', joined_line)
1509 if parameter_regexp: # Ignore bad syntax
1510 function += parameter_regexp.group(1)
1511 else:
1512 function += '()'
1513 function_state.Begin(function)
1514 break
1515 if not body_found:
erg@google.com6317a9c2009-06-25 00:28:19 +00001516 # No body for the function (or evidence of a non-function) was found.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001517 error(filename, linenum, 'readability/fn_size', 5,
1518 'Lint failed to find start of function body.')
1519 elif Match(r'^\}\s*$', line): # function end
1520 if not Search(r'\bNOLINT\b', raw_line):
1521 function_state.Check(error, filename, linenum)
1522 function_state.End()
1523 elif not Match(r'^\s*$', line):
1524 function_state.Count() # Count non-blank/non-comment lines.
1525
1526
1527_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
1528
1529
1530def CheckComment(comment, filename, linenum, error):
1531 """Checks for common mistakes in TODO comments.
1532
1533 Args:
1534 comment: The text of the comment from the line in question.
1535 filename: The name of the current file.
1536 linenum: The number of the line to check.
1537 error: The function to call with any errors found.
1538 """
1539 match = _RE_PATTERN_TODO.match(comment)
1540 if match:
1541 # One whitespace is correct; zero whitespace is handled elsewhere.
1542 leading_whitespace = match.group(1)
1543 if len(leading_whitespace) > 1:
1544 error(filename, linenum, 'whitespace/todo', 2,
1545 'Too many spaces before TODO')
1546
1547 username = match.group(2)
1548 if not username:
1549 error(filename, linenum, 'readability/todo', 2,
1550 'Missing username in TODO; it should look like '
1551 '"// TODO(my_username): Stuff."')
1552
1553 middle_whitespace = match.group(3)
erg@google.com6317a9c2009-06-25 00:28:19 +00001554 # Comparisons made explicit for correctness -- pylint: disable-msg=C6403
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001555 if middle_whitespace != ' ' and middle_whitespace != '':
1556 error(filename, linenum, 'whitespace/todo', 2,
1557 'TODO(my_username) should be followed by a space')
1558
1559
1560def CheckSpacing(filename, clean_lines, linenum, error):
1561 """Checks for the correctness of various spacing issues in the code.
1562
1563 Things we check for: spaces around operators, spaces after
1564 if/for/while/switch, no spaces around parens in function calls, two
1565 spaces between code and comment, don't start a block with a blank
1566 line, don't end a function with a blank line, don't have too many
1567 blank lines in a row.
1568
1569 Args:
1570 filename: The name of the current file.
1571 clean_lines: A CleansedLines instance containing the file.
1572 linenum: The number of the line to check.
1573 error: The function to call with any errors found.
1574 """
1575
1576 raw = clean_lines.raw_lines
1577 line = raw[linenum]
1578
1579 # Before nixing comments, check if the line is blank for no good
1580 # reason. This includes the first line after a block is opened, and
1581 # blank lines at the end of a function (ie, right before a line like '}'
1582 if IsBlankLine(line):
1583 elided = clean_lines.elided
1584 prev_line = elided[linenum - 1]
1585 prevbrace = prev_line.rfind('{')
1586 # TODO(unknown): Don't complain if line before blank line, and line after,
1587 # both start with alnums and are indented the same amount.
1588 # This ignores whitespace at the start of a namespace block
1589 # because those are not usually indented.
1590 if (prevbrace != -1 and prev_line[prevbrace:].find('}') == -1
1591 and prev_line[:prevbrace].find('namespace') == -1):
1592 # OK, we have a blank line at the start of a code block. Before we
1593 # complain, we check if it is an exception to the rule: The previous
1594 # non-empty line has the paramters of a function header that are indented
1595 # 4 spaces (because they did not fit in a 80 column line when placed on
1596 # the same line as the function name). We also check for the case where
1597 # the previous line is indented 6 spaces, which may happen when the
1598 # initializers of a constructor do not fit into a 80 column line.
1599 exception = False
1600 if Match(r' {6}\w', prev_line): # Initializer list?
1601 # We are looking for the opening column of initializer list, which
1602 # should be indented 4 spaces to cause 6 space indentation afterwards.
1603 search_position = linenum-2
1604 while (search_position >= 0
1605 and Match(r' {6}\w', elided[search_position])):
1606 search_position -= 1
1607 exception = (search_position >= 0
1608 and elided[search_position][:5] == ' :')
1609 else:
1610 # Search for the function arguments or an initializer list. We use a
1611 # simple heuristic here: If the line is indented 4 spaces; and we have a
1612 # closing paren, without the opening paren, followed by an opening brace
1613 # or colon (for initializer lists) we assume that it is the last line of
1614 # a function header. If we have a colon indented 4 spaces, it is an
1615 # initializer list.
1616 exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
1617 prev_line)
1618 or Match(r' {4}:', prev_line))
1619
1620 if not exception:
1621 error(filename, linenum, 'whitespace/blank_line', 2,
1622 'Blank line at the start of a code block. Is this needed?')
1623 # This doesn't ignore whitespace at the end of a namespace block
1624 # because that is too hard without pairing open/close braces;
1625 # however, a special exception is made for namespace closing
1626 # brackets which have a comment containing "namespace".
1627 #
1628 # Also, ignore blank lines at the end of a block in a long if-else
1629 # chain, like this:
1630 # if (condition1) {
1631 # // Something followed by a blank line
1632 #
1633 # } else if (condition2) {
1634 # // Something else
1635 # }
1636 if linenum + 1 < clean_lines.NumLines():
1637 next_line = raw[linenum + 1]
1638 if (next_line
1639 and Match(r'\s*}', next_line)
1640 and next_line.find('namespace') == -1
1641 and next_line.find('} else ') == -1):
1642 error(filename, linenum, 'whitespace/blank_line', 3,
1643 'Blank line at the end of a code block. Is this needed?')
1644
1645 # Next, we complain if there's a comment too near the text
1646 commentpos = line.find('//')
1647 if commentpos != -1:
1648 # Check if the // may be in quotes. If so, ignore it
erg@google.com6317a9c2009-06-25 00:28:19 +00001649 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001650 if (line.count('"', 0, commentpos) -
1651 line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
1652 # Allow one space for new scopes, two spaces otherwise:
1653 if (not Match(r'^\s*{ //', line) and
1654 ((commentpos >= 1 and
1655 line[commentpos-1] not in string.whitespace) or
1656 (commentpos >= 2 and
1657 line[commentpos-2] not in string.whitespace))):
1658 error(filename, linenum, 'whitespace/comments', 2,
1659 'At least two spaces is best between code and comments')
1660 # There should always be a space between the // and the comment
1661 commentend = commentpos + 2
1662 if commentend < len(line) and not line[commentend] == ' ':
1663 # but some lines are exceptions -- e.g. if they're big
1664 # comment delimiters like:
1665 # //----------------------------------------------------------
erg@google.com6317a9c2009-06-25 00:28:19 +00001666 # or they begin with multiple slashes followed by a space:
1667 # //////// Header comment
1668 match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
1669 Search(r'^/+ ', line[commentend:]))
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001670 if not match:
1671 error(filename, linenum, 'whitespace/comments', 4,
1672 'Should have a space between // and comment')
1673 CheckComment(line[commentpos:], filename, linenum, error)
1674
1675 line = clean_lines.elided[linenum] # get rid of comments and strings
1676
1677 # Don't try to do spacing checks for operator methods
1678 line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
1679
1680 # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
1681 # Otherwise not. Note we only check for non-spaces on *both* sides;
1682 # sometimes people put non-spaces on one side when aligning ='s among
1683 # many lines (not that this is behavior that I approve of...)
1684 if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
1685 error(filename, linenum, 'whitespace/operators', 4,
1686 'Missing spaces around =')
1687
1688 # It's ok not to have spaces around binary operators like + - * /, but if
1689 # there's too little whitespace, we get concerned. It's hard to tell,
1690 # though, so we punt on this one for now. TODO.
1691
1692 # You should always have whitespace around binary operators.
1693 # Alas, we can't test < or > because they're legitimately used sans spaces
1694 # (a->b, vector<int> a). The only time we can tell is a < with no >, and
1695 # only if it's not template params list spilling into the next line.
1696 match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
1697 if not match:
1698 # Note that while it seems that the '<[^<]*' term in the following
1699 # regexp could be simplified to '<.*', which would indeed match
1700 # the same class of strings, the [^<] means that searching for the
1701 # regexp takes linear rather than quadratic time.
1702 if not Search(r'<[^<]*,\s*$', line): # template params spill
1703 match = Search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
1704 if match:
1705 error(filename, linenum, 'whitespace/operators', 3,
1706 'Missing spaces around %s' % match.group(1))
1707 # We allow no-spaces around << and >> when used like this: 10<<20, but
1708 # not otherwise (particularly, not when used as streams)
1709 match = Search(r'[^0-9\s](<<|>>)[^0-9\s]', line)
1710 if match:
1711 error(filename, linenum, 'whitespace/operators', 3,
1712 'Missing spaces around %s' % match.group(1))
1713
1714 # There shouldn't be space around unary operators
1715 match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
1716 if match:
1717 error(filename, linenum, 'whitespace/operators', 4,
1718 'Extra space for operator %s' % match.group(1))
1719
1720 # A pet peeve of mine: no spaces after an if, while, switch, or for
1721 match = Search(r' (if\(|for\(|while\(|switch\()', line)
1722 if match:
1723 error(filename, linenum, 'whitespace/parens', 5,
1724 'Missing space before ( in %s' % match.group(1))
1725
1726 # For if/for/while/switch, the left and right parens should be
1727 # consistent about how many spaces are inside the parens, and
1728 # there should either be zero or one spaces inside the parens.
1729 # We don't want: "if ( foo)" or "if ( foo )".
erg@google.com6317a9c2009-06-25 00:28:19 +00001730 # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001731 match = Search(r'\b(if|for|while|switch)\s*'
1732 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
1733 line)
1734 if match:
1735 if len(match.group(2)) != len(match.group(4)):
1736 if not (match.group(3) == ';' and
erg@google.com6317a9c2009-06-25 00:28:19 +00001737 len(match.group(2)) == 1 + len(match.group(4)) or
1738 not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00001739 error(filename, linenum, 'whitespace/parens', 5,
1740 'Mismatching spaces inside () in %s' % match.group(1))
1741 if not len(match.group(2)) in [0, 1]:
1742 error(filename, linenum, 'whitespace/parens', 5,
1743 'Should have zero or one spaces inside ( and ) in %s' %
1744 match.group(1))
1745
1746 # You should always have a space after a comma (either as fn arg or operator)
1747 if Search(r',[^\s]', line):
1748 error(filename, linenum, 'whitespace/comma', 3,
1749 'Missing space after ,')
1750
1751 # Next we will look for issues with function calls.
1752 CheckSpacingForFunctionCall(filename, line, linenum, error)
1753
1754 # Except after an opening paren, you should have spaces before your braces.
1755 # And since you should never have braces at the beginning of a line, this is
1756 # an easy test.
1757 if Search(r'[^ (]{', line):
1758 error(filename, linenum, 'whitespace/braces', 5,
1759 'Missing space before {')
1760
1761 # Make sure '} else {' has spaces.
1762 if Search(r'}else', line):
1763 error(filename, linenum, 'whitespace/braces', 5,
1764 'Missing space before else')
1765
1766 # You shouldn't have spaces before your brackets, except maybe after
1767 # 'delete []' or 'new char * []'.
1768 if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
1769 error(filename, linenum, 'whitespace/braces', 5,
1770 'Extra space before [')
1771
1772 # You shouldn't have a space before a semicolon at the end of the line.
1773 # There's a special case for "for" since the style guide allows space before
1774 # the semicolon there.
1775 if Search(r':\s*;\s*$', line):
1776 error(filename, linenum, 'whitespace/semicolon', 5,
1777 'Semicolon defining empty statement. Use { } instead.')
1778 elif Search(r'^\s*;\s*$', line):
1779 error(filename, linenum, 'whitespace/semicolon', 5,
1780 'Line contains only semicolon. If this should be an empty statement, '
1781 'use { } instead.')
1782 elif (Search(r'\s+;\s*$', line) and
1783 not Search(r'\bfor\b', line)):
1784 error(filename, linenum, 'whitespace/semicolon', 5,
1785 'Extra space before last semicolon. If this should be an empty '
1786 'statement, use { } instead.')
1787
1788
1789def GetPreviousNonBlankLine(clean_lines, linenum):
1790 """Return the most recent non-blank line and its line number.
1791
1792 Args:
1793 clean_lines: A CleansedLines instance containing the file contents.
1794 linenum: The number of the line to check.
1795
1796 Returns:
1797 A tuple with two elements. The first element is the contents of the last
1798 non-blank line before the current line, or the empty string if this is the
1799 first non-blank line. The second is the line number of that line, or -1
1800 if this is the first non-blank line.
1801 """
1802
1803 prevlinenum = linenum - 1
1804 while prevlinenum >= 0:
1805 prevline = clean_lines.elided[prevlinenum]
1806 if not IsBlankLine(prevline): # if not a blank line...
1807 return (prevline, prevlinenum)
1808 prevlinenum -= 1
1809 return ('', -1)
1810
1811
1812def CheckBraces(filename, clean_lines, linenum, error):
1813 """Looks for misplaced braces (e.g. at the end of line).
1814
1815 Args:
1816 filename: The name of the current file.
1817 clean_lines: A CleansedLines instance containing the file.
1818 linenum: The number of the line to check.
1819 error: The function to call with any errors found.
1820 """
1821
1822 line = clean_lines.elided[linenum] # get rid of comments and strings
1823
1824 if Match(r'\s*{\s*$', line):
1825 # We allow an open brace to start a line in the case where someone
1826 # is using braces in a block to explicitly create a new scope,
1827 # which is commonly used to control the lifetime of
1828 # stack-allocated variables. We don't detect this perfectly: we
1829 # just don't complain if the last non-whitespace character on the
1830 # previous non-blank line is ';', ':', '{', or '}'.
1831 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
1832 if not Search(r'[;:}{]\s*$', prevline):
1833 error(filename, linenum, 'whitespace/braces', 4,
1834 '{ should almost always be at the end of the previous line')
1835
1836 # An else clause should be on the same line as the preceding closing brace.
1837 if Match(r'\s*else\s*', line):
1838 prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
1839 if Match(r'\s*}\s*$', prevline):
1840 error(filename, linenum, 'whitespace/newline', 4,
1841 'An else should appear on the same line as the preceding }')
1842
1843 # If braces come on one side of an else, they should be on both.
1844 # However, we have to worry about "else if" that spans multiple lines!
1845 if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
1846 if Search(r'}\s*else if([^{]*)$', line): # could be multi-line if
1847 # find the ( after the if
1848 pos = line.find('else if')
1849 pos = line.find('(', pos)
1850 if pos > 0:
1851 (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
1852 if endline[endpos:].find('{') == -1: # must be brace after if
1853 error(filename, linenum, 'readability/braces', 5,
1854 'If an else has a brace on one side, it should have it on both')
1855 else: # common case: else not followed by a multi-line if
1856 error(filename, linenum, 'readability/braces', 5,
1857 'If an else has a brace on one side, it should have it on both')
1858
1859 # Likewise, an else should never have the else clause on the same line
1860 if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
1861 error(filename, linenum, 'whitespace/newline', 4,
1862 'Else clause should never be on same line as else (use 2 lines)')
1863
1864 # In the same way, a do/while should never be on one line
1865 if Match(r'\s*do [^\s{]', line):
1866 error(filename, linenum, 'whitespace/newline', 4,
1867 'do/while clauses should not be on a single line')
1868
1869 # Braces shouldn't be followed by a ; unless they're defining a struct
1870 # or initializing an array.
1871 # We can't tell in general, but we can for some common cases.
1872 prevlinenum = linenum
1873 while True:
1874 (prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
1875 if Match(r'\s+{.*}\s*;', line) and not prevline.count(';'):
1876 line = prevline + line
1877 else:
1878 break
1879 if (Search(r'{.*}\s*;', line) and
1880 line.count('{') == line.count('}') and
1881 not Search(r'struct|class|enum|\s*=\s*{', line)):
1882 error(filename, linenum, 'readability/braces', 4,
1883 "You don't need a ; after a }")
1884
1885
1886def ReplaceableCheck(operator, macro, line):
1887 """Determine whether a basic CHECK can be replaced with a more specific one.
1888
1889 For example suggest using CHECK_EQ instead of CHECK(a == b) and
1890 similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
1891
1892 Args:
1893 operator: The C++ operator used in the CHECK.
1894 macro: The CHECK or EXPECT macro being called.
1895 line: The current source line.
1896
1897 Returns:
1898 True if the CHECK can be replaced with a more specific one.
1899 """
1900
1901 # This matches decimal and hex integers, strings, and chars (in that order).
1902 match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
1903
1904 # Expression to match two sides of the operator with something that
1905 # looks like a literal, since CHECK(x == iterator) won't compile.
1906 # This means we can't catch all the cases where a more specific
1907 # CHECK is possible, but it's less annoying than dealing with
1908 # extraneous warnings.
1909 match_this = (r'\s*' + macro + r'\((\s*' +
1910 match_constant + r'\s*' + operator + r'[^<>].*|'
1911 r'.*[^<>]' + operator + r'\s*' + match_constant +
1912 r'\s*\))')
1913
1914 # Don't complain about CHECK(x == NULL) or similar because
1915 # CHECK_EQ(x, NULL) won't compile (requires a cast).
1916 # Also, don't complain about more complex boolean expressions
1917 # involving && or || such as CHECK(a == b || c == d).
1918 return Match(match_this, line) and not Search(r'NULL|&&|\|\|', line)
1919
1920
1921def CheckCheck(filename, clean_lines, linenum, error):
1922 """Checks the use of CHECK and EXPECT macros.
1923
1924 Args:
1925 filename: The name of the current file.
1926 clean_lines: A CleansedLines instance containing the file.
1927 linenum: The number of the line to check.
1928 error: The function to call with any errors found.
1929 """
1930
1931 # Decide the set of replacement macros that should be suggested
1932 raw_lines = clean_lines.raw_lines
1933 current_macro = ''
1934 for macro in _CHECK_MACROS:
1935 if raw_lines[linenum].find(macro) >= 0:
1936 current_macro = macro
1937 break
1938 if not current_macro:
1939 # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
1940 return
1941
1942 line = clean_lines.elided[linenum] # get rid of comments and strings
1943
1944 # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
1945 for operator in ['==', '!=', '>=', '>', '<=', '<']:
1946 if ReplaceableCheck(operator, current_macro, line):
1947 error(filename, linenum, 'readability/check', 2,
1948 'Consider using %s instead of %s(a %s b)' % (
1949 _CHECK_REPLACEMENT[current_macro][operator],
1950 current_macro, operator))
1951 break
1952
1953
1954def GetLineWidth(line):
1955 """Determines the width of the line in column positions.
1956
1957 Args:
1958 line: A string, which may be a Unicode string.
1959
1960 Returns:
1961 The width of the line in column positions, accounting for Unicode
1962 combining characters and wide characters.
1963 """
1964 if isinstance(line, unicode):
1965 width = 0
1966 for c in unicodedata.normalize('NFC', line):
1967 if unicodedata.east_asian_width(c) in ('W', 'F'):
1968 width += 2
1969 elif not unicodedata.combining(c):
1970 width += 1
1971 return width
1972 else:
1973 return len(line)
1974
1975
1976def CheckStyle(filename, clean_lines, linenum, file_extension, error):
1977 """Checks rules from the 'C++ style rules' section of cppguide.html.
1978
1979 Most of these rules are hard to test (naming, comment style), but we
1980 do what we can. In particular we check for 2-space indents, line lengths,
1981 tab usage, spaces inside code, etc.
1982
1983 Args:
1984 filename: The name of the current file.
1985 clean_lines: A CleansedLines instance containing the file.
1986 linenum: The number of the line to check.
1987 file_extension: The extension (without the dot) of the filename.
1988 error: The function to call with any errors found.
1989 """
1990
1991 raw_lines = clean_lines.raw_lines
1992 line = raw_lines[linenum]
1993
1994 if line.find('\t') != -1:
1995 error(filename, linenum, 'whitespace/tab', 1,
1996 'Tab found; better to use spaces')
1997
1998 # One or three blank spaces at the beginning of the line is weird; it's
1999 # hard to reconcile that with 2-space indents.
2000 # NOTE: here are the conditions rob pike used for his tests. Mine aren't
2001 # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
2002 # if(RLENGTH > 20) complain = 0;
2003 # if(match($0, " +(error|private|public|protected):")) complain = 0;
2004 # if(match(prev, "&& *$")) complain = 0;
2005 # if(match(prev, "\\|\\| *$")) complain = 0;
2006 # if(match(prev, "[\",=><] *$")) complain = 0;
2007 # if(match($0, " <<")) complain = 0;
2008 # if(match(prev, " +for \\(")) complain = 0;
2009 # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
2010 initial_spaces = 0
2011 cleansed_line = clean_lines.elided[linenum]
2012 while initial_spaces < len(line) and line[initial_spaces] == ' ':
2013 initial_spaces += 1
2014 if line and line[-1].isspace():
2015 error(filename, linenum, 'whitespace/end_of_line', 4,
2016 'Line ends in whitespace. Consider deleting these extra spaces.')
2017 # There are certain situations we allow one space, notably for labels
2018 elif ((initial_spaces == 1 or initial_spaces == 3) and
2019 not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
2020 error(filename, linenum, 'whitespace/indent', 3,
2021 'Weird number of spaces at line-start. '
2022 'Are you using a 2-space indent?')
2023 # Labels should always be indented at least one space.
2024 elif not initial_spaces and line[:2] != '//' and Search(r'[^:]:\s*$',
2025 line):
2026 error(filename, linenum, 'whitespace/labels', 4,
2027 'Labels should always be indented at least one space. '
2028 'If this is a member-initializer list in a constructor, '
2029 'the colon should be on the line after the definition header.')
2030
2031 # Check if the line is a header guard.
2032 is_header_guard = False
2033 if file_extension == 'h':
2034 cppvar = GetHeaderGuardCPPVariable(filename)
2035 if (line.startswith('#ifndef %s' % cppvar) or
2036 line.startswith('#define %s' % cppvar) or
2037 line.startswith('#endif // %s' % cppvar)):
2038 is_header_guard = True
2039 # #include lines and header guards can be long, since there's no clean way to
2040 # split them.
erg@google.com6317a9c2009-06-25 00:28:19 +00002041 #
2042 # URLs can be long too. It's possible to split these, but it makes them
2043 # harder to cut&paste.
2044 if (not line.startswith('#include') and not is_header_guard and
2045 not Match(r'^\s*//.*http(s?)://\S*$', line)):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002046 line_width = GetLineWidth(line)
2047 if line_width > 100:
2048 error(filename, linenum, 'whitespace/line_length', 4,
2049 'Lines should very rarely be longer than 100 characters')
2050 elif line_width > 80:
2051 error(filename, linenum, 'whitespace/line_length', 2,
2052 'Lines should be <= 80 characters long')
2053
2054 if (cleansed_line.count(';') > 1 and
2055 # for loops are allowed two ;'s (and may run over two lines).
2056 cleansed_line.find('for') == -1 and
2057 (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
2058 GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
2059 # It's ok to have many commands in a switch case that fits in 1 line
2060 not ((cleansed_line.find('case ') != -1 or
2061 cleansed_line.find('default:') != -1) and
2062 cleansed_line.find('break;') != -1)):
2063 error(filename, linenum, 'whitespace/newline', 4,
2064 'More than one command on the same line')
2065
2066 # Some more style checks
2067 CheckBraces(filename, clean_lines, linenum, error)
2068 CheckSpacing(filename, clean_lines, linenum, error)
2069 CheckCheck(filename, clean_lines, linenum, error)
2070
2071
2072_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
2073_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
2074# Matches the first component of a filename delimited by -s and _s. That is:
2075# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
2076# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
2077# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
2078# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
2079_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
2080
2081
2082def _DropCommonSuffixes(filename):
2083 """Drops common suffixes like _test.cc or -inl.h from filename.
2084
2085 For example:
2086 >>> _DropCommonSuffixes('foo/foo-inl.h')
2087 'foo/foo'
2088 >>> _DropCommonSuffixes('foo/bar/foo.cc')
2089 'foo/bar/foo'
2090 >>> _DropCommonSuffixes('foo/foo_internal.h')
2091 'foo/foo'
2092 >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
2093 'foo/foo_unusualinternal'
2094
2095 Args:
2096 filename: The input filename.
2097
2098 Returns:
2099 The filename with the common suffix removed.
2100 """
2101 for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
2102 'inl.h', 'impl.h', 'internal.h'):
2103 if (filename.endswith(suffix) and len(filename) > len(suffix) and
2104 filename[-len(suffix) - 1] in ('-', '_')):
2105 return filename[:-len(suffix) - 1]
2106 return os.path.splitext(filename)[0]
2107
2108
2109def _IsTestFilename(filename):
2110 """Determines if the given filename has a suffix that identifies it as a test.
2111
2112 Args:
2113 filename: The input filename.
2114
2115 Returns:
2116 True if 'filename' looks like a test, False otherwise.
2117 """
2118 if (filename.endswith('_test.cc') or
2119 filename.endswith('_unittest.cc') or
2120 filename.endswith('_regtest.cc')):
2121 return True
2122 else:
2123 return False
2124
2125
2126def _ClassifyInclude(fileinfo, include, is_system):
2127 """Figures out what kind of header 'include' is.
2128
2129 Args:
2130 fileinfo: The current file cpplint is running over. A FileInfo instance.
2131 include: The path to a #included file.
2132 is_system: True if the #include used <> rather than "".
2133
2134 Returns:
2135 One of the _XXX_HEADER constants.
2136
2137 For example:
2138 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
2139 _C_SYS_HEADER
2140 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
2141 _CPP_SYS_HEADER
2142 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
2143 _LIKELY_MY_HEADER
2144 >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
2145 ... 'bar/foo_other_ext.h', False)
2146 _POSSIBLE_MY_HEADER
2147 >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
2148 _OTHER_HEADER
2149 """
2150 # This is a list of all standard c++ header files, except
2151 # those already checked for above.
2152 is_stl_h = include in _STL_HEADERS
2153 is_cpp_h = is_stl_h or include in _CPP_HEADERS
2154
2155 if is_system:
2156 if is_cpp_h:
2157 return _CPP_SYS_HEADER
2158 else:
2159 return _C_SYS_HEADER
2160
2161 # If the target file and the include we're checking share a
2162 # basename when we drop common extensions, and the include
2163 # lives in . , then it's likely to be owned by the target file.
2164 target_dir, target_base = (
2165 os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
2166 include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
2167 if target_base == include_base and (
2168 include_dir == target_dir or
2169 include_dir == os.path.normpath(target_dir + '/../public')):
2170 return _LIKELY_MY_HEADER
2171
2172 # If the target and include share some initial basename
2173 # component, it's possible the target is implementing the
2174 # include, so it's allowed to be first, but we'll never
2175 # complain if it's not there.
2176 target_first_component = _RE_FIRST_COMPONENT.match(target_base)
2177 include_first_component = _RE_FIRST_COMPONENT.match(include_base)
2178 if (target_first_component and include_first_component and
2179 target_first_component.group(0) ==
2180 include_first_component.group(0)):
2181 return _POSSIBLE_MY_HEADER
2182
2183 return _OTHER_HEADER
2184
2185
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002186
erg@google.com6317a9c2009-06-25 00:28:19 +00002187def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
2188 """Check rules that are applicable to #include lines.
2189
2190 Strings on #include lines are NOT removed from elided line, to make
2191 certain tasks easier. However, to prevent false positives, checks
2192 applicable to #include lines in CheckLanguage must be put here.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002193
2194 Args:
2195 filename: The name of the current file.
2196 clean_lines: A CleansedLines instance containing the file.
2197 linenum: The number of the line to check.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002198 include_state: An _IncludeState instance in which the headers are inserted.
2199 error: The function to call with any errors found.
2200 """
2201 fileinfo = FileInfo(filename)
2202
erg@google.com6317a9c2009-06-25 00:28:19 +00002203 line = clean_lines.lines[linenum]
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002204
2205 # "include" should use the new style "foo/bar.h" instead of just "bar.h"
erg@google.com6317a9c2009-06-25 00:28:19 +00002206 if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002207 error(filename, linenum, 'build/include', 4,
2208 'Include the directory when naming .h files')
2209
2210 # we shouldn't include a file more than once. actually, there are a
2211 # handful of instances where doing so is okay, but in general it's
2212 # not.
erg@google.com6317a9c2009-06-25 00:28:19 +00002213 match = _RE_PATTERN_INCLUDE.search(line)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002214 if match:
2215 include = match.group(2)
2216 is_system = (match.group(1) == '<')
2217 if include in include_state:
2218 error(filename, linenum, 'build/include', 4,
2219 '"%s" already included at %s:%s' %
2220 (include, filename, include_state[include]))
2221 else:
2222 include_state[include] = linenum
2223
2224 # We want to ensure that headers appear in the right order:
2225 # 1) for foo.cc, foo.h (preferred location)
2226 # 2) c system files
2227 # 3) cpp system files
2228 # 4) for foo.cc, foo.h (deprecated location)
2229 # 5) other google headers
2230 #
2231 # We classify each include statement as one of those 5 types
2232 # using a number of techniques. The include_state object keeps
2233 # track of the highest type seen, and complains if we see a
2234 # lower type after that.
2235 error_message = include_state.CheckNextIncludeOrder(
2236 _ClassifyInclude(fileinfo, include, is_system))
2237 if error_message:
2238 error(filename, linenum, 'build/include_order', 4,
2239 '%s. Should be: %s.h, c system, c++ system, other.' %
2240 (error_message, fileinfo.BaseName()))
erg@google.com26970fa2009-11-17 18:07:32 +00002241 if not include_state.IsInAlphabeticalOrder(include):
2242 error(filename, linenum, 'build/include_alpha', 4,
2243 'Include "%s" not in alphabetical order' % include)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002244
erg@google.com6317a9c2009-06-25 00:28:19 +00002245 # Look for any of the stream classes that are part of standard C++.
2246 match = _RE_PATTERN_INCLUDE.match(line)
2247 if match:
2248 include = match.group(2)
2249 if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
2250 # Many unit tests use cout, so we exempt them.
2251 if not _IsTestFilename(filename):
2252 error(filename, linenum, 'readability/streams', 3,
2253 'Streams are highly discouraged.')
2254
2255def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
2256 error):
2257 """Checks rules from the 'C++ language rules' section of cppguide.html.
2258
2259 Some of these rules are hard to test (function overloading, using
2260 uint32 inappropriately), but we do the best we can.
2261
2262 Args:
2263 filename: The name of the current file.
2264 clean_lines: A CleansedLines instance containing the file.
2265 linenum: The number of the line to check.
2266 file_extension: The extension (without the dot) of the filename.
2267 include_state: An _IncludeState instance in which the headers are inserted.
2268 error: The function to call with any errors found.
2269 """
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002270 # If the line is empty or consists of entirely a comment, no need to
2271 # check it.
2272 line = clean_lines.elided[linenum]
2273 if not line:
2274 return
2275
erg@google.com6317a9c2009-06-25 00:28:19 +00002276 match = _RE_PATTERN_INCLUDE.search(line)
2277 if match:
2278 CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
2279 return
2280
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002281 # Create an extended_line, which is the concatenation of the current and
2282 # next lines, for more effective checking of code that may span more than one
2283 # line.
2284 if linenum + 1 < clean_lines.NumLines():
2285 extended_line = line + clean_lines.elided[linenum + 1]
2286 else:
2287 extended_line = line
2288
2289 # Make Windows paths like Unix.
2290 fullname = os.path.abspath(filename).replace('\\', '/')
2291
2292 # TODO(unknown): figure out if they're using default arguments in fn proto.
2293
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002294 # Check for non-const references in functions. This is tricky because &
2295 # is also used to take the address of something. We allow <> for templates,
2296 # (ignoring whatever is between the braces) and : for classes.
2297 # These are complicated re's. They try to capture the following:
2298 # paren (for fn-prototype start), typename, &, varname. For the const
2299 # version, we're willing for const to be before typename or after
2300 # Don't check the implemention on same line.
2301 fnline = line.split('{', 1)[0]
2302 if (len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) >
2303 len(re.findall(r'\([^()]*\bconst\s+(?:typename\s+)?(?:struct\s+)?'
2304 r'(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) +
2305 len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+\s+const(\s?&|&\s?)[\w]+',
2306 fnline))):
2307
2308 # We allow non-const references in a few standard places, like functions
2309 # called "swap()" or iostream operators like "<<" or ">>".
2310 if not Search(
2311 r'(swap|Swap|operator[<>][<>])\s*\(\s*(?:[\w:]|<.*>)+\s*&',
2312 fnline):
2313 error(filename, linenum, 'runtime/references', 2,
2314 'Is this a non-const reference? '
2315 'If so, make const or use a pointer.')
2316
2317 # Check to see if they're using an conversion function cast.
2318 # I just try to capture the most common basic types, though there are more.
2319 # Parameterless conversion functions, such as bool(), are allowed as they are
2320 # probably a member operator declaration or default constructor.
2321 match = Search(
erg@google.com26970fa2009-11-17 18:07:32 +00002322 r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there
2323 r'(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002324 if match:
2325 # gMock methods are defined using some variant of MOCK_METHODx(name, type)
2326 # where type may be float(), int(string), etc. Without context they are
2327 # virtually indistinguishable from int(x) casts.
erg@google.com26970fa2009-11-17 18:07:32 +00002328 if (match.group(1) is None and # If new operator, then this isn't a cast
2329 not Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line)):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002330 error(filename, linenum, 'readability/casting', 4,
2331 'Using deprecated casting style. '
2332 'Use static_cast<%s>(...) instead' %
erg@google.com26970fa2009-11-17 18:07:32 +00002333 match.group(2))
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002334
2335 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2336 'static_cast',
2337 r'\((int|float|double|bool|char|u?int(16|32|64))\)',
2338 error)
2339 # This doesn't catch all cases. Consider (const char * const)"hello".
2340 CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
2341 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
2342
2343 # In addition, we look for people taking the address of a cast. This
2344 # is dangerous -- casts can assign to temporaries, so the pointer doesn't
2345 # point where you think.
2346 if Search(
2347 r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
2348 error(filename, linenum, 'runtime/casting', 4,
2349 ('Are you taking an address of a cast? '
2350 'This is dangerous: could be a temp var. '
2351 'Take the address before doing the cast, rather than after'))
2352
2353 # Check for people declaring static/global STL strings at the top level.
2354 # This is dangerous because the C++ language does not guarantee that
2355 # globals with constructors are initialized before the first access.
2356 match = Match(
2357 r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
2358 line)
2359 # Make sure it's not a function.
2360 # Function template specialization looks like: "string foo<Type>(...".
2361 # Class template definitions look like: "string Foo<Type>::Method(...".
2362 if match and not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
2363 match.group(3)):
2364 error(filename, linenum, 'runtime/string', 4,
2365 'For a static/global string constant, use a C style string instead: '
2366 '"%schar %s[]".' %
2367 (match.group(1), match.group(2)))
2368
2369 # Check that we're not using RTTI outside of testing code.
2370 if Search(r'\bdynamic_cast<', line) and not _IsTestFilename(filename):
2371 error(filename, linenum, 'runtime/rtti', 5,
2372 'Do not use dynamic_cast<>. If you need to cast within a class '
2373 "hierarchy, use static_cast<> to upcast. Google doesn't support "
2374 'RTTI.')
2375
2376 if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
2377 error(filename, linenum, 'runtime/init', 4,
2378 'You seem to be initializing a member variable with itself.')
2379
2380 if file_extension == 'h':
2381 # TODO(unknown): check that 1-arg constructors are explicit.
2382 # How to tell it's a constructor?
2383 # (handled in CheckForNonStandardConstructs for now)
2384 # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
2385 # (level 1 error)
2386 pass
2387
2388 # Check if people are using the verboten C basic types. The only exception
2389 # we regularly allow is "unsigned short port" for port.
2390 if Search(r'\bshort port\b', line):
2391 if not Search(r'\bunsigned short port\b', line):
2392 error(filename, linenum, 'runtime/int', 4,
2393 'Use "unsigned short" for ports, not "short"')
2394 else:
2395 match = Search(r'\b(short|long(?! +double)|long long)\b', line)
2396 if match:
2397 error(filename, linenum, 'runtime/int', 4,
2398 'Use int16/int64/etc, rather than the C type %s' % match.group(1))
2399
2400 # When snprintf is used, the second argument shouldn't be a literal.
2401 match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
2402 if match:
2403 error(filename, linenum, 'runtime/printf', 3,
2404 'If you can, use sizeof(%s) instead of %s as the 2nd arg '
2405 'to snprintf.' % (match.group(1), match.group(2)))
2406
2407 # Check if some verboten C functions are being used.
2408 if Search(r'\bsprintf\b', line):
2409 error(filename, linenum, 'runtime/printf', 5,
2410 'Never use sprintf. Use snprintf instead.')
2411 match = Search(r'\b(strcpy|strcat)\b', line)
2412 if match:
2413 error(filename, linenum, 'runtime/printf', 4,
2414 'Almost always, snprintf is better than %s' % match.group(1))
2415
2416 if Search(r'\bsscanf\b', line):
2417 error(filename, linenum, 'runtime/printf', 1,
2418 'sscanf can be ok, but is slow and can overflow buffers.')
2419
erg@google.com26970fa2009-11-17 18:07:32 +00002420 # Check if some verboten operator overloading is going on
2421 # TODO(unknown): catch out-of-line unary operator&:
2422 # class X {};
2423 # int operator&(const X& x) { return 42; } // unary operator&
2424 # The trick is it's hard to tell apart from binary operator&:
2425 # class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
2426 if Search(r'\boperator\s*&\s*\(\s*\)', line):
2427 error(filename, linenum, 'runtime/operator', 4,
2428 'Unary operator& is dangerous. Do not use it.')
2429
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002430 # Check for suspicious usage of "if" like
2431 # } if (a == b) {
2432 if Search(r'\}\s*if\s*\(', line):
2433 error(filename, linenum, 'readability/braces', 4,
2434 'Did you mean "else if"? If not, start a new line for "if".')
2435
2436 # Check for potential format string bugs like printf(foo).
2437 # We constrain the pattern not to pick things like DocidForPrintf(foo).
2438 # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
2439 match = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
2440 if match:
2441 error(filename, linenum, 'runtime/printf', 4,
2442 'Potential format string bug. Do %s("%%s", %s) instead.'
2443 % (match.group(1), match.group(2)))
2444
2445 # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
2446 match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
2447 if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
2448 error(filename, linenum, 'runtime/memset', 4,
2449 'Did you mean "memset(%s, 0, %s)"?'
2450 % (match.group(1), match.group(2)))
2451
2452 if Search(r'\busing namespace\b', line):
2453 error(filename, linenum, 'build/namespaces', 5,
2454 'Do not use namespace using-directives. '
2455 'Use using-declarations instead.')
2456
2457 # Detect variable-length arrays.
2458 match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
2459 if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
2460 match.group(3).find(']') == -1):
2461 # Split the size using space and arithmetic operators as delimiters.
2462 # If any of the resulting tokens are not compile time constants then
2463 # report the error.
2464 tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
2465 is_const = True
2466 skip_next = False
2467 for tok in tokens:
2468 if skip_next:
2469 skip_next = False
2470 continue
2471
2472 if Search(r'sizeof\(.+\)', tok): continue
2473 if Search(r'arraysize\(\w+\)', tok): continue
2474
2475 tok = tok.lstrip('(')
2476 tok = tok.rstrip(')')
2477 if not tok: continue
2478 if Match(r'\d+', tok): continue
2479 if Match(r'0[xX][0-9a-fA-F]+', tok): continue
2480 if Match(r'k[A-Z0-9]\w*', tok): continue
2481 if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
2482 if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
2483 # A catch all for tricky sizeof cases, including 'sizeof expression',
2484 # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
2485 # requires skipping the next token becasue we split on ' ' and '*'.
2486 if tok.startswith('sizeof'):
2487 skip_next = True
2488 continue
2489 is_const = False
2490 break
2491 if not is_const:
2492 error(filename, linenum, 'runtime/arrays', 1,
2493 'Do not use variable-length arrays. Use an appropriately named '
2494 "('k' followed by CamelCase) compile-time constant for the size.")
2495
2496 # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
2497 # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
2498 # in the class declaration.
2499 match = Match(
2500 (r'\s*'
2501 r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
2502 r'\(.*\);$'),
2503 line)
2504 if match and linenum + 1 < clean_lines.NumLines():
2505 next_line = clean_lines.elided[linenum + 1]
2506 if not Search(r'^\s*};', next_line):
2507 error(filename, linenum, 'readability/constructors', 3,
2508 match.group(1) + ' should be the last thing in the class')
2509
2510 # Check for use of unnamed namespaces in header files. Registration
2511 # macros are typically OK, so we allow use of "namespace {" on lines
2512 # that end with backslashes.
2513 if (file_extension == 'h'
2514 and Search(r'\bnamespace\s*{', line)
2515 and line[-1] != '\\'):
2516 error(filename, linenum, 'build/namespaces', 4,
2517 'Do not use unnamed namespaces in header files. See '
2518 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
2519 ' for more information.')
2520
2521
2522def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
2523 error):
2524 """Checks for a C-style cast by looking for the pattern.
2525
2526 This also handles sizeof(type) warnings, due to similarity of content.
2527
2528 Args:
2529 filename: The name of the current file.
2530 linenum: The number of the line to check.
2531 line: The line of code to check.
2532 raw_line: The raw line of code to check, with comments.
2533 cast_type: The string for the C++ cast to recommend. This is either
2534 reinterpret_cast or static_cast, depending.
2535 pattern: The regular expression used to find C-style casts.
2536 error: The function to call with any errors found.
2537 """
2538 match = Search(pattern, line)
2539 if not match:
2540 return
2541
2542 # e.g., sizeof(int)
2543 sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
2544 if sizeof_match:
2545 error(filename, linenum, 'runtime/sizeof', 1,
2546 'Using sizeof(type). Use sizeof(varname) instead if possible')
2547 return
2548
2549 remainder = line[match.end(0):]
2550
2551 # The close paren is for function pointers as arguments to a function.
2552 # eg, void foo(void (*bar)(int));
2553 # The semicolon check is a more basic function check; also possibly a
2554 # function pointer typedef.
2555 # eg, void foo(int); or void foo(int) const;
2556 # The equals check is for function pointer assignment.
2557 # eg, void *(*foo)(int) = ...
2558 #
2559 # Right now, this will only catch cases where there's a single argument, and
2560 # it's unnamed. It should probably be expanded to check for multiple
2561 # arguments with some unnamed.
2562 function_match = Match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
2563 if function_match:
2564 if (not function_match.group(3) or
2565 function_match.group(3) == ';' or
2566 raw_line.find('/*') < 0):
2567 error(filename, linenum, 'readability/function', 3,
2568 'All parameters should be named in a function')
2569 return
2570
2571 # At this point, all that should be left is actual casts.
2572 error(filename, linenum, 'readability/casting', 4,
2573 'Using C-style cast. Use %s<%s>(...) instead' %
2574 (cast_type, match.group(1)))
2575
2576
2577_HEADERS_CONTAINING_TEMPLATES = (
2578 ('<deque>', ('deque',)),
2579 ('<functional>', ('unary_function', 'binary_function',
2580 'plus', 'minus', 'multiplies', 'divides', 'modulus',
2581 'negate',
2582 'equal_to', 'not_equal_to', 'greater', 'less',
2583 'greater_equal', 'less_equal',
2584 'logical_and', 'logical_or', 'logical_not',
2585 'unary_negate', 'not1', 'binary_negate', 'not2',
2586 'bind1st', 'bind2nd',
2587 'pointer_to_unary_function',
2588 'pointer_to_binary_function',
2589 'ptr_fun',
2590 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
2591 'mem_fun_ref_t',
2592 'const_mem_fun_t', 'const_mem_fun1_t',
2593 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
2594 'mem_fun_ref',
2595 )),
2596 ('<limits>', ('numeric_limits',)),
2597 ('<list>', ('list',)),
2598 ('<map>', ('map', 'multimap',)),
2599 ('<memory>', ('allocator',)),
2600 ('<queue>', ('queue', 'priority_queue',)),
2601 ('<set>', ('set', 'multiset',)),
2602 ('<stack>', ('stack',)),
2603 ('<string>', ('char_traits', 'basic_string',)),
2604 ('<utility>', ('pair',)),
2605 ('<vector>', ('vector',)),
2606
2607 # gcc extensions.
2608 # Note: std::hash is their hash, ::hash is our hash
2609 ('<hash_map>', ('hash_map', 'hash_multimap',)),
2610 ('<hash_set>', ('hash_set', 'hash_multiset',)),
2611 ('<slist>', ('slist',)),
2612 )
2613
2614_HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
2615 # We can trust with reasonable confidence that map gives us pair<>, too.
2616 'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
2617}
2618
2619_RE_PATTERN_STRING = re.compile(r'\bstring\b')
2620
2621_re_pattern_algorithm_header = []
erg@google.com6317a9c2009-06-25 00:28:19 +00002622for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
2623 'transform'):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002624 # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
2625 # type::max().
2626 _re_pattern_algorithm_header.append(
2627 (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
2628 _template,
2629 '<algorithm>'))
2630
2631_re_pattern_templates = []
2632for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
2633 for _template in _templates:
2634 _re_pattern_templates.append(
2635 (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
2636 _template + '<>',
2637 _header))
2638
2639
erg@google.com6317a9c2009-06-25 00:28:19 +00002640def FilesBelongToSameModule(filename_cc, filename_h):
2641 """Check if these two filenames belong to the same module.
2642
2643 The concept of a 'module' here is a as follows:
2644 foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
2645 same 'module' if they are in the same directory.
2646 some/path/public/xyzzy and some/path/internal/xyzzy are also considered
2647 to belong to the same module here.
2648
2649 If the filename_cc contains a longer path than the filename_h, for example,
2650 '/absolute/path/to/base/sysinfo.cc', and this file would include
2651 'base/sysinfo.h', this function also produces the prefix needed to open the
2652 header. This is used by the caller of this function to more robustly open the
2653 header file. We don't have access to the real include paths in this context,
2654 so we need this guesswork here.
2655
2656 Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
2657 according to this implementation. Because of this, this function gives
2658 some false positives. This should be sufficiently rare in practice.
2659
2660 Args:
2661 filename_cc: is the path for the .cc file
2662 filename_h: is the path for the header path
2663
2664 Returns:
2665 Tuple with a bool and a string:
2666 bool: True if filename_cc and filename_h belong to the same module.
2667 string: the additional prefix needed to open the header file.
2668 """
2669
2670 if not filename_cc.endswith('.cc'):
2671 return (False, '')
2672 filename_cc = filename_cc[:-len('.cc')]
2673 if filename_cc.endswith('_unittest'):
2674 filename_cc = filename_cc[:-len('_unittest')]
2675 elif filename_cc.endswith('_test'):
2676 filename_cc = filename_cc[:-len('_test')]
2677 filename_cc = filename_cc.replace('/public/', '/')
2678 filename_cc = filename_cc.replace('/internal/', '/')
2679
2680 if not filename_h.endswith('.h'):
2681 return (False, '')
2682 filename_h = filename_h[:-len('.h')]
2683 if filename_h.endswith('-inl'):
2684 filename_h = filename_h[:-len('-inl')]
2685 filename_h = filename_h.replace('/public/', '/')
2686 filename_h = filename_h.replace('/internal/', '/')
2687
2688 files_belong_to_same_module = filename_cc.endswith(filename_h)
2689 common_path = ''
2690 if files_belong_to_same_module:
2691 common_path = filename_cc[:-len(filename_h)]
2692 return files_belong_to_same_module, common_path
2693
2694
2695def UpdateIncludeState(filename, include_state, io=codecs):
2696 """Fill up the include_state with new includes found from the file.
2697
2698 Args:
2699 filename: the name of the header to read.
2700 include_state: an _IncludeState instance in which the headers are inserted.
2701 io: The io factory to use to read the file. Provided for testability.
2702
2703 Returns:
2704 True if a header was succesfully added. False otherwise.
2705 """
2706 headerfile = None
2707 try:
2708 headerfile = io.open(filename, 'r', 'utf8', 'replace')
2709 except IOError:
2710 return False
2711 linenum = 0
2712 for line in headerfile:
2713 linenum += 1
2714 clean_line = CleanseComments(line)
2715 match = _RE_PATTERN_INCLUDE.search(clean_line)
2716 if match:
2717 include = match.group(2)
2718 # The value formatting is cute, but not really used right now.
2719 # What matters here is that the key is in include_state.
2720 include_state.setdefault(include, '%s:%d' % (filename, linenum))
2721 return True
2722
2723
2724def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
2725 io=codecs):
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002726 """Reports for missing stl includes.
2727
2728 This function will output warnings to make sure you are including the headers
2729 necessary for the stl containers and functions that you use. We only give one
2730 reason to include a header. For example, if you use both equal_to<> and
2731 less<> in a .h file, only one (the latter in the file) of these will be
2732 reported as a reason to include the <functional>.
2733
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002734 Args:
2735 filename: The name of the current file.
2736 clean_lines: A CleansedLines instance containing the file.
2737 include_state: An _IncludeState instance.
2738 error: The function to call with any errors found.
erg@google.com6317a9c2009-06-25 00:28:19 +00002739 io: The IO factory to use to read the header file. Provided for unittest
2740 injection.
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002741 """
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002742 required = {} # A map of header name to linenumber and the template entity.
2743 # Example of required: { '<functional>': (1219, 'less<>') }
2744
2745 for linenum in xrange(clean_lines.NumLines()):
2746 line = clean_lines.elided[linenum]
2747 if not line or line[0] == '#':
2748 continue
2749
2750 # String is special -- it is a non-templatized type in STL.
2751 if _RE_PATTERN_STRING.search(line):
2752 required['<string>'] = (linenum, 'string')
2753
2754 for pattern, template, header in _re_pattern_algorithm_header:
2755 if pattern.search(line):
2756 required[header] = (linenum, template)
2757
2758 # The following function is just a speed up, no semantics are changed.
2759 if not '<' in line: # Reduces the cpu time usage by skipping lines.
2760 continue
2761
2762 for pattern, template, header in _re_pattern_templates:
2763 if pattern.search(line):
2764 required[header] = (linenum, template)
2765
erg@google.com6317a9c2009-06-25 00:28:19 +00002766 # The policy is that if you #include something in foo.h you don't need to
2767 # include it again in foo.cc. Here, we will look at possible includes.
2768 # Let's copy the include_state so it is only messed up within this function.
2769 include_state = include_state.copy()
2770
2771 # Did we find the header for this file (if any) and succesfully load it?
2772 header_found = False
2773
2774 # Use the absolute path so that matching works properly.
2775 abs_filename = os.path.abspath(filename)
2776
2777 # For Emacs's flymake.
2778 # If cpplint is invoked from Emacs's flymake, a temporary file is generated
2779 # by flymake and that file name might end with '_flymake.cc'. In that case,
2780 # restore original file name here so that the corresponding header file can be
2781 # found.
2782 # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
2783 # instead of 'foo_flymake.h'
2784 emacs_flymake_suffix = '_flymake.cc'
2785 if abs_filename.endswith(emacs_flymake_suffix):
2786 abs_filename = abs_filename[:-len(emacs_flymake_suffix)] + '.cc'
2787
2788 # include_state is modified during iteration, so we iterate over a copy of
2789 # the keys.
2790 for header in include_state.keys(): #NOLINT
2791 (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
2792 fullpath = common_path + header
2793 if same_module and UpdateIncludeState(fullpath, include_state, io):
2794 header_found = True
2795
2796 # If we can't find the header file for a .cc, assume it's because we don't
2797 # know where to look. In that case we'll give up as we're not sure they
2798 # didn't include it in the .h file.
2799 # TODO(unknown): Do a better job of finding .h files so we are confident that
2800 # not having the .h file means there isn't one.
2801 if filename.endswith('.cc') and not header_found:
2802 return
2803
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002804 # All the lines have been processed, report the errors found.
2805 for required_header_unstripped in required:
2806 template = required[required_header_unstripped][1]
2807 if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
2808 headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
2809 if [True for header in headers if header in include_state]:
2810 continue
2811 if required_header_unstripped.strip('<>"') not in include_state:
2812 error(filename, required[required_header_unstripped][0],
2813 'build/include_what_you_use', 4,
2814 'Add #include ' + required_header_unstripped + ' for ' + template)
2815
2816
2817def ProcessLine(filename, file_extension,
2818 clean_lines, line, include_state, function_state,
2819 class_state, error):
2820 """Processes a single line in the file.
2821
2822 Args:
2823 filename: Filename of the file that is being processed.
2824 file_extension: The extension (dot not included) of the file.
2825 clean_lines: An array of strings, each representing a line of the file,
2826 with comments stripped.
2827 line: Number of line being processed.
2828 include_state: An _IncludeState instance in which the headers are inserted.
2829 function_state: A _FunctionState instance which counts function lines, etc.
2830 class_state: A _ClassState instance which maintains information about
2831 the current stack of nested class declarations being parsed.
2832 error: A callable to which errors are reported, which takes 4 arguments:
2833 filename, line number, error level, and message
2834
2835 """
2836 raw_lines = clean_lines.raw_lines
2837 CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
2838 if Search(r'\bNOLINT\b', raw_lines[line]): # ignore nolint lines
2839 return
2840 CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
2841 CheckStyle(filename, clean_lines, line, file_extension, error)
2842 CheckLanguage(filename, clean_lines, line, file_extension, include_state,
2843 error)
2844 CheckForNonStandardConstructs(filename, clean_lines, line,
2845 class_state, error)
2846 CheckPosixThreading(filename, clean_lines, line, error)
erg@google.com6317a9c2009-06-25 00:28:19 +00002847 CheckInvalidIncrement(filename, clean_lines, line, error)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002848
2849
2850def ProcessFileData(filename, file_extension, lines, error):
2851 """Performs lint checks and reports any errors to the given error function.
2852
2853 Args:
2854 filename: Filename of the file that is being processed.
2855 file_extension: The extension (dot not included) of the file.
2856 lines: An array of strings, each representing a line of the file, with the
2857 last element being empty if the file is termined with a newline.
2858 error: A callable to which errors are reported, which takes 4 arguments:
2859 """
2860 lines = (['// marker so line numbers and indices both start at 1'] + lines +
2861 ['// marker so line numbers end in a known way'])
2862
2863 include_state = _IncludeState()
2864 function_state = _FunctionState()
2865 class_state = _ClassState()
2866
2867 CheckForCopyright(filename, lines, error)
2868
2869 if file_extension == 'h':
2870 CheckForHeaderGuard(filename, lines, error)
2871
2872 RemoveMultiLineComments(filename, lines, error)
2873 clean_lines = CleansedLines(lines)
2874 for line in xrange(clean_lines.NumLines()):
2875 ProcessLine(filename, file_extension, clean_lines, line,
2876 include_state, function_state, class_state, error)
2877 class_state.CheckFinished(filename, error)
2878
2879 CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
2880
2881 # We check here rather than inside ProcessLine so that we see raw
2882 # lines rather than "cleaned" lines.
2883 CheckForUnicodeReplacementCharacters(filename, lines, error)
2884
2885 CheckForNewlineAtEOF(filename, lines, error)
2886
2887
2888def ProcessFile(filename, vlevel):
2889 """Does google-lint on a single file.
2890
2891 Args:
2892 filename: The name of the file to parse.
2893
2894 vlevel: The level of errors to report. Every error of confidence
2895 >= verbose_level will be reported. 0 is a good default.
2896 """
2897
2898 _SetVerboseLevel(vlevel)
2899
2900 try:
2901 # Support the UNIX convention of using "-" for stdin. Note that
2902 # we are not opening the file with universal newline support
2903 # (which codecs doesn't support anyway), so the resulting lines do
2904 # contain trailing '\r' characters if we are reading a file that
2905 # has CRLF endings.
2906 # If after the split a trailing '\r' is present, it is removed
2907 # below. If it is not expected to be present (i.e. os.linesep !=
2908 # '\r\n' as in Windows), a warning is issued below if this file
2909 # is processed.
2910
2911 if filename == '-':
2912 lines = codecs.StreamReaderWriter(sys.stdin,
2913 codecs.getreader('utf8'),
2914 codecs.getwriter('utf8'),
2915 'replace').read().split('\n')
2916 else:
2917 lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
2918
2919 carriage_return_found = False
2920 # Remove trailing '\r'.
2921 for linenum in range(len(lines)):
2922 if lines[linenum].endswith('\r'):
2923 lines[linenum] = lines[linenum].rstrip('\r')
2924 carriage_return_found = True
2925
2926 except IOError:
2927 sys.stderr.write(
2928 "Skipping input '%s': Can't open for reading\n" % filename)
2929 return
2930
2931 # Note, if no dot is found, this will give the entire filename as the ext.
2932 file_extension = filename[filename.rfind('.') + 1:]
2933
2934 # When reading from stdin, the extension is unknown, so no cpplint tests
2935 # should rely on the extension.
2936 if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
2937 and file_extension != 'cpp'):
2938 sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
2939 else:
2940 ProcessFileData(filename, file_extension, lines, Error)
2941 if carriage_return_found and os.linesep != '\r\n':
2942 # Use 0 for linenum since outputing only one error for potentially
2943 # several lines.
2944 Error(filename, 0, 'whitespace/newline', 1,
2945 'One or more unexpected \\r (^M) found;'
2946 'better to use only a \\n')
2947
2948 sys.stderr.write('Done processing %s\n' % filename)
2949
2950
2951def PrintUsage(message):
2952 """Prints a brief usage string and exits, optionally with an error message.
2953
2954 Args:
2955 message: The optional error message.
2956 """
2957 sys.stderr.write(_USAGE)
2958 if message:
2959 sys.exit('\nFATAL ERROR: ' + message)
2960 else:
2961 sys.exit(1)
2962
2963
2964def PrintCategories():
2965 """Prints a list of all the error-categories used by error messages.
2966
2967 These are the categories used to filter messages via --filter.
2968 """
2969 sys.stderr.write(_ERROR_CATEGORIES)
2970 sys.exit(0)
2971
2972
2973def ParseArguments(args):
2974 """Parses the command line arguments.
2975
2976 This may set the output format and verbosity level as side-effects.
2977
2978 Args:
2979 args: The command line arguments:
2980
2981 Returns:
2982 The list of filenames to lint.
2983 """
2984 try:
2985 (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
erg@google.com26970fa2009-11-17 18:07:32 +00002986 'counting=',
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002987 'filter='])
2988 except getopt.GetoptError:
2989 PrintUsage('Invalid arguments.')
2990
2991 verbosity = _VerboseLevel()
2992 output_format = _OutputFormat()
2993 filters = ''
erg@google.com26970fa2009-11-17 18:07:32 +00002994 counting_style = ''
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00002995
2996 for (opt, val) in opts:
2997 if opt == '--help':
2998 PrintUsage(None)
2999 elif opt == '--output':
3000 if not val in ('emacs', 'vs7'):
3001 PrintUsage('The only allowed output formats are emacs and vs7.')
3002 output_format = val
3003 elif opt == '--verbose':
3004 verbosity = int(val)
3005 elif opt == '--filter':
3006 filters = val
erg@google.com6317a9c2009-06-25 00:28:19 +00003007 if not filters:
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003008 PrintCategories()
erg@google.com26970fa2009-11-17 18:07:32 +00003009 elif opt == '--counting':
3010 if val not in ('total', 'toplevel', 'detailed'):
3011 PrintUsage('Valid counting options are total, toplevel, and detailed')
3012 counting_style = val
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003013
3014 if not filenames:
3015 PrintUsage('No files were specified.')
3016
3017 _SetOutputFormat(output_format)
3018 _SetVerboseLevel(verbosity)
3019 _SetFilters(filters)
erg@google.com26970fa2009-11-17 18:07:32 +00003020 _SetCountingStyle(counting_style)
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003021
3022 return filenames
3023
3024
3025def main():
3026 filenames = ParseArguments(sys.argv[1:])
3027
3028 # Change stderr to write with replacement characters so we don't die
3029 # if we try to print something containing non-ASCII characters.
3030 sys.stderr = codecs.StreamReaderWriter(sys.stderr,
3031 codecs.getreader('utf8'),
3032 codecs.getwriter('utf8'),
3033 'replace')
3034
erg@google.com26970fa2009-11-17 18:07:32 +00003035 _cpplint_state.ResetErrorCounts()
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003036 for filename in filenames:
3037 ProcessFile(filename, _cpplint_state.verbose_level)
erg@google.com26970fa2009-11-17 18:07:32 +00003038 _cpplint_state.PrintErrorCounts()
3039
maruel@google.comfb2b8eb2009-04-23 21:03:42 +00003040 sys.exit(_cpplint_state.error_count > 0)
3041
3042
3043if __name__ == '__main__':
3044 main()